1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  31 /*        All Rights Reserved   */
  32 
  33 #pragma ident   "%Z%%M% %I%     %E% SMI"
  34 
  35 /*
  36  * fgrep -- print all lines containing any of a set of keywords
  37  *
  38  *      status returns:
  39  *              0 - ok, and some matches
  40  *              1 - ok, but no matches
  41  *              2 - some error
  42  */
  43 
  44 #include <stdio.h>
  45 #include <ctype.h>
  46 #include <sys/types.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #include <locale.h>
  50 #include <libintl.h>
  51 #include <euc.h>
  52 #include <sys/stat.h>
  53 #include <fcntl.h>
  54 
  55 #include <getwidth.h>
  56 
  57 eucwidth_t WW;
  58 #define WIDTH1  WW._eucw1
  59 #define WIDTH2  WW._eucw2
  60 #define WIDTH3  WW._eucw3
  61 #define MULTI_BYTE      WW._multibyte
  62 #define GETONE(lc, p) \
  63         cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
  64                 (ISSET2(lc) ? WIDTH2 :                       \
  65                 (ISSET3(lc) ? WIDTH3 : WIDTH1));             \
  66         if (--cw > --ccount) {                           \
  67                 cw -= ccount;                                \
  68                 while (ccount--)                             \
  69                         lc = (lc << 7) | ((*p++) & 0177);        \
  70                         if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
  71                         if (nlp == buf) {                        \
  72                                 /* Increase the buffer size */       \
  73                                 fw_lBufsiz += BUFSIZ;                \
  74                                 if ((buf = realloc(buf,              \
  75                                         fw_lBufsiz + BUFSIZ)) == NULL) { \
  76                                         exit(2); /* out of memory */     \
  77                                 }                                    \
  78                                 nlp = buf;                           \
  79                                 p = &buf[fw_lBufsiz];                \
  80                         } else {                                 \
  81                                 /* shift the buffer contents down */ \
  82                                 (void) memmove(buf, nlp,             \
  83                                         &buf[fw_lBufsiz + BUFSIZ] - nlp);\
  84                                 p -= nlp - buf;                      \
  85                                 nlp = buf;                           \
  86                         }                                        \
  87                 }                                            \
  88                 if (p > &buf[fw_lBufsiz]) {                  \
  89                         if ((ccount = fread(p, sizeof (char),    \
  90                             &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
  91                                 <= 0) break;                         \
  92                 } else if ((ccount = fread(p,                \
  93                         sizeof (char),  BUFSIZ, fptr)) <= 0)     \
  94                         break;                                   \
  95                 blkno += (long long)ccount;                  \
  96         }                                                \
  97         ccount -= cw;                                    \
  98         while (cw--)                                     \
  99                 lc = (lc << 7) | ((*p++) & 0177)
 100 
 101 /*
 102  * The same() macro and letter() function were inserted to allow for
 103  * the -i option work for the multi-byte environment.
 104  */
 105 wchar_t letter();
 106 #define same(a, b) \
 107         (a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
 108         letter(a) == letter(b))
 109 
 110 #define STDIN_FILENAME gettext("(standard input)")
 111 
 112 #define QSIZE 400
 113 struct words {
 114         wchar_t inp;
 115         char    out;
 116         struct  words *nst;
 117         struct  words *link;
 118         struct  words *fail;
 119 } *w = NULL, *smax, *q;
 120 
 121 FILE *fptr;
 122 long long lnum;
 123 int     bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, qflag;
 124 int     Hflag, hflag, iflag;
 125 int     retcode = 0;
 126 int     nfile;
 127 long long blkno;
 128 int     nsucc;
 129 long long tln;
 130 FILE    *wordf;
 131 char    *argptr;
 132 off_t input_size = 0;
 133 
 134 void    execute(char *);
 135 void    cgotofn(void);
 136 void    overflo(void);
 137 void    cfail(void);
 138 
 139 static long fw_lBufsiz = 0;
 140 
 141 int
 142 main(int argc, char **argv)
 143 {
 144         int c;
 145         int errflg = 0;
 146         struct stat file_stat;
 147 
 148         (void) setlocale(LC_ALL, "");
 149 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 150 #define TEXT_DOMAIN "SYS_TEST"  /* Use this only if it weren't */
 151 #endif
 152         (void) textdomain(TEXT_DOMAIN);
 153 
 154         while ((c = getopt(argc, argv, "Hhybcie:f:lnvxqs")) != EOF)
 155                 switch (c) {
 156 
 157                 case 'q':
 158                 case 's': /* Solaris: legacy option */
 159                         qflag++;
 160                         continue;
 161                 case 'H':
 162                         Hflag++;
 163                         hflag = 0;
 164                         continue;
 165                 case 'h':
 166                         hflag++;
 167                         Hflag = 0;
 168                         continue;
 169                 case 'b':
 170                         bflag++;
 171                         continue;
 172 
 173                 case 'i':
 174                 case 'y':
 175                         iflag++;
 176                         continue;
 177 
 178                 case 'c':
 179                         cflag++;
 180                         continue;
 181 
 182                 case 'e':
 183                         eflag++;
 184                         argptr = optarg;
 185                         input_size = strlen(argptr);
 186                         continue;
 187 
 188                 case 'f':
 189                         fflag++;
 190                         wordf = fopen(optarg, "r");
 191                         if (wordf == NULL) {
 192                                 (void) fprintf(stderr,
 193                                         gettext("fgrep: can't open %s\n"),
 194                                         optarg);
 195                                 exit(2);
 196                         }
 197 
 198                         if (fstat(fileno(wordf), &file_stat) == 0) {
 199                             input_size = file_stat.st_size;
 200                         } else {
 201                                 (void) fprintf(stderr,
 202                                         gettext("fgrep: can't fstat %s\n"),
 203                                         optarg);
 204                                 exit(2);
 205                         }
 206 
 207                         continue;
 208 
 209                 case 'l':
 210                         lflag++;
 211                         continue;
 212 
 213                 case 'n':
 214                         nflag++;
 215                         continue;
 216 
 217                 case 'v':
 218                         vflag++;
 219                         continue;
 220 
 221                 case 'x':
 222                         xflag++;
 223                         continue;
 224 
 225                 case '?':
 226                         errflg++;
 227         }
 228 
 229         argc -= optind;
 230         if (errflg || ((argc <= 0) && !fflag && !eflag)) {
 231                 (void) printf(gettext("usage: fgrep [ -bcHhilnqsvx ] "
 232                         "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
 233                 exit(2);
 234         }
 235         if (!eflag && !fflag) {
 236                 argptr = argv[optind];
 237                 input_size = strlen(argptr);
 238                 input_size++;
 239                 optind++;
 240                 argc--;
 241         }
 242 
 243 /*
 244  * Normally we need one struct words for each letter in the pattern
 245  * plus one terminating struct words with outp = 1, but when -x option
 246  * is specified we require one more struct words for `\n` character so we
 247  * calculate the input_size as below. We add extra 1 because
 248  * (input_size/2) rounds off odd numbers
 249  */
 250 
 251         if (xflag) {
 252                 input_size = input_size + (input_size/2) + 1;
 253         }
 254 
 255         input_size++;
 256 
 257         w = (struct words *)calloc(input_size, sizeof (struct words));
 258         if (w == NULL) {
 259                 (void) fprintf(stderr,
 260                         gettext("fgrep: could not allocate "
 261                                 "memory for wordlist\n"));
 262                 exit(2);
 263         }
 264 
 265         getwidth(&WW);
 266         if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
 267                 (WIDTH3 == 0)) {
 268                 /*
 269                  * If non EUC-based locale,
 270                  * assume WIDTH1 is 1.
 271                  */
 272                 WIDTH1 = 1;
 273         }
 274         WIDTH2++;
 275         WIDTH3++;
 276 
 277         cgotofn();
 278         cfail();
 279         nfile = argc;
 280         argv = &argv[optind];
 281         if (argc <= 0) {
 282                 execute((char *)NULL);
 283         } else
 284                 while (--argc >= 0) {
 285                         execute(*argv);
 286                         argv++;
 287                 }
 288 
 289         if (w != NULL) {
 290                 free(w);
 291         }
 292 
 293         return (retcode != 0 ? retcode : nsucc == 0);
 294 }
 295 
 296 void
 297 execute(char *file)
 298 {
 299         char *p;
 300         struct words *c;
 301         int ccount;
 302         static char *buf = NULL;
 303         int failed;
 304         char *nlp;
 305         wchar_t lc;
 306         int cw;
 307 
 308         if (buf == NULL) {
 309                 fw_lBufsiz = BUFSIZ;
 310                 if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
 311                         exit(2); /* out of memory */
 312                 }
 313         }
 314 
 315         if (file) {
 316                 if ((fptr = fopen(file, "r")) == NULL) {
 317                         (void) fprintf(stderr,
 318                                 gettext("fgrep: can't open %s\n"), file);
 319                         retcode = 2;
 320                         return;
 321                 }
 322         } else {
 323                 fptr = stdin;
 324                 file = STDIN_FILENAME;
 325         }
 326         ccount = 0;
 327         failed = 0;
 328         lnum = 1;
 329         tln = 0;
 330         blkno = 0;
 331         p = buf;
 332         nlp = p;
 333         c = w;
 334         for (;;) {
 335                 if (c == 0)
 336                         break;
 337                 if (ccount <= 0) {
 338                         if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
 339                                 if (nlp == buf) {
 340                                         /* increase the buffer size */
 341                                         fw_lBufsiz += BUFSIZ;
 342                                         if ((buf = realloc(buf,
 343                                                 fw_lBufsiz + BUFSIZ)) == NULL) {
 344                                                 exit(2); /* out of memory */
 345                                         }
 346                                         nlp = buf;
 347                                         p = &buf[fw_lBufsiz];
 348                                 } else {
 349                                         /* shift the buffer down */
 350                                         (void) memmove(buf, nlp,
 351                                                 &buf[fw_lBufsiz + BUFSIZ]
 352                                                 - nlp);
 353                                         p -= nlp - buf;
 354                                         nlp = buf;
 355                                 }
 356 
 357                         }
 358                         if (p > &buf[fw_lBufsiz]) {
 359                                 if ((ccount = fread(p, sizeof (char),
 360                                         &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
 361                                         <= 0)
 362                                         break;
 363                         } else if ((ccount = fread(p, sizeof (char),
 364                                 BUFSIZ, fptr)) <= 0)
 365                                 break;
 366                         blkno += (long long)ccount;
 367                 }
 368                 GETONE(lc, p);
 369 nstate:
 370                 if (same(c->inp, lc)) {
 371                         c = c->nst;
 372                 } else if (c->link != 0) {
 373                         c = c->link;
 374                         goto nstate;
 375                 } else {
 376                         c = c->fail;
 377                         failed = 1;
 378                         if (c == 0) {
 379                                 c = w;
 380 istate:
 381                                 if (same(c->inp, lc)) {
 382                                         c = c->nst;
 383                                 } else if (c->link != 0) {
 384                                         c = c->link;
 385                                         goto istate;
 386                                 }
 387                         } else
 388                                 goto nstate;
 389                 }
 390 
 391                 if (c == 0)
 392                         break;
 393 
 394                 if (c->out) {
 395                         while (lc != '\n') {
 396                                 if (ccount <= 0) {
 397 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
 398         if (nlp == buf) {
 399                 /* increase buffer size */
 400                 fw_lBufsiz += BUFSIZ;
 401                 if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
 402                         exit(2); /* out of memory */
 403                 }
 404                 nlp = buf;
 405                 p = &buf[fw_lBufsiz];
 406         } else {
 407                 /* shift buffer down */
 408                 (void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
 409                 p -= nlp - buf;
 410                 nlp = buf;
 411         }
 412 }
 413 if (p > &buf[fw_lBufsiz]) {
 414         if ((ccount = fread(p, sizeof (char),
 415                 &buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
 416         } else if ((ccount = fread(p, sizeof (char), BUFSIZ,
 417                 fptr)) <= 0) break;
 418                 blkno += (long long)ccount;
 419         }
 420         GETONE(lc, p);
 421 }
 422                         if ((vflag && (failed == 0 || xflag == 0)) ||
 423                                 (vflag == 0 && xflag && failed))
 424                                 goto nomatch;
 425 succeed:
 426                         nsucc = 1;
 427                         if (lflag || qflag) {
 428                                 if (!qflag)
 429                                         (void) printf("%s\n", file);
 430                                 (void) fclose(fptr);
 431                                 return;
 432                         }
 433                         if (cflag) {
 434                                 tln++;
 435                         } else {
 436                                 if (Hflag || (nfile > 1 && !hflag))
 437                                         (void) printf("%s:", file);
 438                                 if (bflag)
 439                                         (void) printf("%lld:",
 440                                                 (blkno - (long long)(ccount-1))
 441                                                 / BUFSIZ);
 442                                 if (nflag)
 443                                         (void) printf("%lld:", lnum);
 444                                 if (p <= nlp) {
 445                                         while (nlp < &buf[fw_lBufsiz + BUFSIZ])
 446                                                 (void) putchar(*nlp++);
 447                                         nlp = buf;
 448                                 }
 449                                 while (nlp < p)
 450                                         (void) putchar(*nlp++);
 451                         }
 452 nomatch:
 453                         lnum++;
 454                         nlp = p;
 455                         c = w;
 456                         failed = 0;
 457                         continue;
 458                 }
 459                 if (lc == '\n')
 460                         if (vflag)
 461                                 goto succeed;
 462                         else {
 463                                 lnum++;
 464                                 nlp = p;
 465                                 c = w;
 466                                 failed = 0;
 467                         }
 468         }
 469         (void) fclose(fptr);
 470         if (cflag && !qflag) {
 471                 if (Hflag || (nfile > 1 && !hflag))
 472                         (void) printf("%s:", file);
 473                 (void) printf("%lld\n", tln);
 474         }
 475 }
 476 
 477 
 478 wchar_t
 479 getargc(void)
 480 {
 481         /* appends a newline to shell quoted argument list so */
 482         /* the list looks like it came from an ed style file  */
 483         wchar_t c;
 484         int cw;
 485         int b;
 486         static int endflg;
 487 
 488 
 489         if (wordf) {
 490                 if ((b = getc(wordf)) == EOF)
 491                         return (EOF);
 492                 cw = ISASCII(c = (wchar_t)b) ? 1 :
 493                         (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
 494                 while (--cw) {
 495                         if ((b = getc(wordf)) == EOF)
 496                                 return (EOF);
 497                         c = (c << 7) | (b & 0177);
 498                 }
 499                 return (iflag ? letter(c) : c);
 500         }
 501 
 502         if (endflg)
 503                 return (EOF);
 504 
 505         {
 506                 cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
 507                         (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
 508 
 509                 while (--cw)
 510                         c = (c << 7) | ((*argptr++) & 0177);
 511                 if (c == '\0') {
 512                         endflg++;
 513                         return ('\n');
 514                 }
 515         }
 516         return (iflag ? letter(c) : c);
 517 
 518 
 519 }
 520 
 521 void
 522 cgotofn(void)
 523 {
 524         int c;
 525         struct words *s;
 526 
 527         s = smax = w;
 528 nword:
 529         for (;;) {
 530                 c = getargc();
 531                 if (c == EOF)
 532                         return;
 533                 if (c == 0)
 534                         goto enter;
 535                 if (c == '\n') {
 536                         if (xflag) {
 537                                 for (;;) {
 538                                         if (s->inp == c) {
 539                                                 s = s->nst;
 540                                                 break;
 541                                         }
 542                                         if (s->inp == 0)
 543                                                 goto nenter;
 544                                         if (s->link == 0) {
 545                                                 if (smax >= &w[input_size -1])
 546                                                         overflo();
 547                                                 s->link = ++smax;
 548                                                 s = smax;
 549                                                 goto nenter;
 550                                         }
 551                                         s = s->link;
 552                                 }
 553                         }
 554                         s->out = 1;
 555                         s = w;
 556                 } else {
 557 loop:
 558                         if (s->inp == c) {
 559                                 s = s->nst;
 560                                 continue;
 561                         }
 562                         if (s->inp == 0)
 563                                 goto enter;
 564                         if (s->link == 0) {
 565                                 if (smax >= &w[input_size -1])
 566                                         overflo();
 567                                 s->link = ++smax;
 568                                 s = smax;
 569                                 goto enter;
 570                         }
 571                         s = s->link;
 572                         goto loop;
 573                 }
 574         }
 575 
 576 enter:
 577         do {
 578                 s->inp = c;
 579                 if (smax >= &w[input_size -1])
 580                         overflo();
 581                 s->nst = ++smax;
 582                 s = smax;
 583         } while ((c = getargc()) != '\n' && c != EOF);
 584         if (xflag) {
 585 nenter:
 586                 s->inp = '\n';
 587                 if (smax >= &w[input_size -1])
 588                         overflo();
 589                 s->nst = ++smax;
 590         }
 591         smax->out = 1;
 592         s = w;
 593         if (c != EOF)
 594                 goto nword;
 595 }
 596 
 597 /*
 598  * This function is an unexpected condition, since input_size should have been
 599  * calculated correctly before hand.
 600  */
 601 
 602 void
 603 overflo(void)
 604 {
 605         (void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
 606         exit(2);
 607 }
 608 
 609 void
 610 cfail(void)
 611 {
 612         int qsize = QSIZE;
 613         struct words **queue = NULL;
 614 
 615         /*
 616          * front and rear are pointers used to traverse the global words
 617          * structure "w" which contains the data of input pattern file
 618          */
 619         struct words **front, **rear;
 620         struct words *state;
 621         unsigned long frontoffset = 0, rearoffset = 0;
 622         char c;
 623         struct words *s;
 624         s = w;
 625         if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
 626                                 == NULL) {
 627                 perror("fgrep");
 628                 exit(2);
 629         }
 630         front = rear = queue;
 631 init:
 632         if ((s->inp) != 0) {
 633                 *rear++ = s->nst;
 634         /*
 635          * Reallocates the queue if the number of distinct starting
 636          * character of patterns exceeds the qsize value
 637          */
 638                 if (rear >= &queue[qsize - 1]) {
 639                         frontoffset = front - queue;
 640                         rearoffset = rear - queue;
 641                         qsize += QSIZE;
 642                         if ((queue = (struct words **)realloc(queue,
 643                                 qsize * sizeof (struct words *))) == NULL) {
 644                                 perror("fgrep");
 645                                 exit(2);
 646                         }
 647                         front = queue + frontoffset;
 648                         rear = queue + rearoffset;
 649                 }
 650         }
 651         if ((s = s->link) != 0) {
 652                 goto init;
 653         }
 654 
 655         while (rear != front) {
 656                 s = *front++;
 657 cloop:
 658                 if ((c = s->inp) != 0) {
 659                         *rear++ = (q = s->nst);
 660                 /*
 661                  * Reallocate the queue if the rear pointer reaches the end
 662                  * queue
 663                  */
 664                         if (rear >= &queue[qsize - 1]) {
 665                                 frontoffset = front - queue;
 666                                 rearoffset = rear - queue;
 667                                 qsize += QSIZE;
 668                                 if ((queue = (struct words **)realloc(queue,
 669                                     qsize * sizeof (struct words *))) == NULL) {
 670                                         perror("fgrep");
 671                                         exit(2);
 672                                 }
 673                                 front = queue + frontoffset;
 674                                 rear = queue + rearoffset;
 675                         }
 676                         state = s->fail;
 677 floop:
 678                         if (state == 0)
 679                                 state = w;
 680                         if (state->inp == c) {
 681 qloop:
 682                                 q->fail = state->nst;
 683                                 if ((state->nst)->out == 1)
 684                                         q->out = 1;
 685                                 if ((q = q->link) != 0)
 686                                         goto qloop;
 687                         } else if ((state = state->link) != 0)
 688                                 goto floop;
 689                 }
 690                 if ((s = s->link) != 0)
 691                         goto cloop;
 692         }
 693 }
 694 
 695 wchar_t
 696 letter(wchar_t c)
 697 {
 698         if (c >= 'a' && c <= 'z')
 699                 return (c);
 700         if (c >= 'A' && c <= 'Z')
 701                 return (c + 'a' - 'A');
 702         return (c);
 703 }