1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  31 /*        All Rights Reserved   */
  32 
  33 /*
  34  * Copyright 2013 Damian Bogel. All rights reserved.
  35  */
  36 
  37 /*
  38  * fgrep -- print all lines containing any of a set of keywords
  39  *
  40  *      status returns:
  41  *              0 - ok, and some matches
  42  *              1 - ok, but no matches
  43  *              2 - some error
  44  */
  45 
  46 #include <stdio.h>
  47 #include <ctype.h>
  48 #include <sys/types.h>
  49 #include <stdlib.h>
  50 #include <string.h>
  51 #include <locale.h>
  52 #include <libintl.h>
  53 #include <euc.h>
  54 #include <sys/stat.h>
  55 #include <fcntl.h>
  56 
  57 #include <getwidth.h>
  58 
  59 eucwidth_t WW;
  60 #define WIDTH1  WW._eucw1
  61 #define WIDTH2  WW._eucw2
  62 #define WIDTH3  WW._eucw3
  63 #define MULTI_BYTE      WW._multibyte
  64 #define GETONE(lc, p) \
  65         cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
  66                 (ISSET2(lc) ? WIDTH2 :                       \
  67                 (ISSET3(lc) ? WIDTH3 : WIDTH1));             \
  68         if (--cw > --ccount) {                           \
  69                 cw -= ccount;                                \
  70                 while (ccount--)                             \
  71                         lc = (lc << 7) | ((*p++) & 0177);        \
  72                         if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
  73                         if (nlp == buf) {                        \
  74                                 /* Increase the buffer size */       \
  75                                 fw_lBufsiz += BUFSIZ;                \
  76                                 if ((buf = realloc(buf,              \
  77                                         fw_lBufsiz + BUFSIZ)) == NULL) { \
  78                                         exit(2); /* out of memory */     \
  79                                 }                                    \
  80                                 nlp = buf;                           \
  81                                 p = &buf[fw_lBufsiz];                \
  82                         } else {                                 \
  83                                 /* shift the buffer contents down */ \
  84                                 (void) memmove(buf, nlp,             \
  85                                         &buf[fw_lBufsiz + BUFSIZ] - nlp);\
  86                                 p -= nlp - buf;                      \
  87                                 nlp = buf;                           \
  88                         }                                        \
  89                 }                                            \
  90                 if (p > &buf[fw_lBufsiz]) {                  \
  91                         if ((ccount = fread(p, sizeof (char),    \
  92                             &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
  93                                 <= 0) break;                         \
  94                 } else if ((ccount = fread(p,                \
  95                         sizeof (char),  BUFSIZ, fptr)) <= 0)     \
  96                         break;                                   \
  97                 blkno += (long long)ccount;                  \
  98         }                                                \
  99         ccount -= cw;                                    \
 100         while (cw--)                                     \
 101                 lc = (lc << 7) | ((*p++) & 0177)
 102 
 103 /*
 104  * The same() macro and letter() function were inserted to allow for
 105  * the -i option work for the multi-byte environment.
 106  */
 107 wchar_t letter();
 108 #define same(a, b) \
 109         (a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
 110         letter(a) == letter(b))
 111 
 112 #define STDIN_FILENAME gettext("(standard input)")
 113 
 114 #define QSIZE 400
 115 struct words {
 116         wchar_t inp;
 117         char    out;
 118         struct  words *nst;
 119         struct  words *link;
 120         struct  words *fail;
 121 } *w = NULL, *smax, *q;
 122 
 123 FILE *fptr;
 124 long long lnum;
 125 int     bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, qflag;
 126 int     Hflag, hflag, iflag;
 127 int     retcode = 0;
 128 int     nfile;
 129 long long blkno;
 130 int     nsucc;
 131 long long tln;
 132 FILE    *wordf;
 133 char    *argptr;
 134 off_t input_size = 0;
 135 
 136 void    execute(char *);
 137 void    cgotofn(void);
 138 void    overflo(void);
 139 void    cfail(void);
 140 
 141 static long fw_lBufsiz = 0;
 142 
 143 int
 144 main(int argc, char **argv)
 145 {
 146         int c;
 147         int errflg = 0;
 148         struct stat file_stat;
 149 
 150         (void) setlocale(LC_ALL, "");
 151 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 152 #define TEXT_DOMAIN "SYS_TEST"  /* Use this only if it weren't */
 153 #endif
 154         (void) textdomain(TEXT_DOMAIN);
 155 
 156         while ((c = getopt(argc, argv, "Hhybcie:f:lnvxqs")) != EOF)
 157                 switch (c) {
 158 
 159                 case 'q':
 160                 case 's': /* Solaris: legacy option */
 161                         qflag++;
 162                         continue;
 163                 case 'H':
 164                         Hflag++;
 165                         hflag = 0;
 166                         continue;
 167                 case 'h':
 168                         hflag++;
 169                         Hflag = 0;
 170                         continue;
 171                 case 'b':
 172                         bflag++;
 173                         continue;
 174 
 175                 case 'i':
 176                 case 'y':
 177                         iflag++;
 178                         continue;
 179 
 180                 case 'c':
 181                         cflag++;
 182                         continue;
 183 
 184                 case 'e':
 185                         eflag++;
 186                         argptr = optarg;
 187                         input_size = strlen(argptr);
 188                         continue;
 189 
 190                 case 'f':
 191                         fflag++;
 192                         wordf = fopen(optarg, "r");
 193                         if (wordf == NULL) {
 194                                 (void) fprintf(stderr,
 195                                         gettext("fgrep: can't open %s\n"),
 196                                         optarg);
 197                                 exit(2);
 198                         }
 199 
 200                         if (fstat(fileno(wordf), &file_stat) == 0) {
 201                             input_size = file_stat.st_size;
 202                         } else {
 203                                 (void) fprintf(stderr,
 204                                         gettext("fgrep: can't fstat %s\n"),
 205                                         optarg);
 206                                 exit(2);
 207                         }
 208 
 209                         continue;
 210 
 211                 case 'l':
 212                         lflag++;
 213                         continue;
 214 
 215                 case 'n':
 216                         nflag++;
 217                         continue;
 218 
 219                 case 'v':
 220                         vflag++;
 221                         continue;
 222 
 223                 case 'x':
 224                         xflag++;
 225                         continue;
 226 
 227                 case '?':
 228                         errflg++;
 229         }
 230 
 231         argc -= optind;
 232         if (errflg || ((argc <= 0) && !fflag && !eflag)) {
 233                 (void) printf(gettext("usage: fgrep [ -bcHhilnqsvx ] "
 234                         "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
 235                 exit(2);
 236         }
 237         if (!eflag && !fflag) {
 238                 argptr = argv[optind];
 239                 input_size = strlen(argptr);
 240                 input_size++;
 241                 optind++;
 242                 argc--;
 243         }
 244 
 245 /*
 246  * Normally we need one struct words for each letter in the pattern
 247  * plus one terminating struct words with outp = 1, but when -x option
 248  * is specified we require one more struct words for `\n` character so we
 249  * calculate the input_size as below. We add extra 1 because
 250  * (input_size/2) rounds off odd numbers
 251  */
 252 
 253         if (xflag) {
 254                 input_size = input_size + (input_size/2) + 1;
 255         }
 256 
 257         input_size++;
 258 
 259         w = (struct words *)calloc(input_size, sizeof (struct words));
 260         if (w == NULL) {
 261                 (void) fprintf(stderr,
 262                         gettext("fgrep: could not allocate "
 263                                 "memory for wordlist\n"));
 264                 exit(2);
 265         }
 266 
 267         getwidth(&WW);
 268         if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
 269                 (WIDTH3 == 0)) {
 270                 /*
 271                  * If non EUC-based locale,
 272                  * assume WIDTH1 is 1.
 273                  */
 274                 WIDTH1 = 1;
 275         }
 276         WIDTH2++;
 277         WIDTH3++;
 278 
 279         cgotofn();
 280         cfail();
 281         nfile = argc;
 282         argv = &argv[optind];
 283         if (argc <= 0) {
 284                 execute((char *)NULL);
 285         } else
 286                 while (--argc >= 0) {
 287                         execute(*argv);
 288                         argv++;
 289                 }
 290 
 291         if (w != NULL) {
 292                 free(w);
 293         }
 294 
 295         return (retcode != 0 ? retcode : nsucc == 0);
 296 }
 297 
 298 void
 299 execute(char *file)
 300 {
 301         char *p;
 302         struct words *c;
 303         int ccount;
 304         static char *buf = NULL;
 305         int failed;
 306         char *nlp;
 307         wchar_t lc;
 308         int cw;
 309 
 310         if (buf == NULL) {
 311                 fw_lBufsiz = BUFSIZ;
 312                 if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
 313                         exit(2); /* out of memory */
 314                 }
 315         }
 316 
 317         if (file) {
 318                 if ((fptr = fopen(file, "r")) == NULL) {
 319                         (void) fprintf(stderr,
 320                                 gettext("fgrep: can't open %s\n"), file);
 321                         retcode = 2;
 322                         return;
 323                 }
 324         } else {
 325                 fptr = stdin;
 326                 file = STDIN_FILENAME;
 327         }
 328         ccount = 0;
 329         failed = 0;
 330         lnum = 1;
 331         tln = 0;
 332         blkno = 0;
 333         p = buf;
 334         nlp = p;
 335         c = w;
 336         for (;;) {
 337                 if (c == 0)
 338                         break;
 339                 if (ccount <= 0) {
 340                         if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
 341                                 if (nlp == buf) {
 342                                         /* increase the buffer size */
 343                                         fw_lBufsiz += BUFSIZ;
 344                                         if ((buf = realloc(buf,
 345                                                 fw_lBufsiz + BUFSIZ)) == NULL) {
 346                                                 exit(2); /* out of memory */
 347                                         }
 348                                         nlp = buf;
 349                                         p = &buf[fw_lBufsiz];
 350                                 } else {
 351                                         /* shift the buffer down */
 352                                         (void) memmove(buf, nlp,
 353                                                 &buf[fw_lBufsiz + BUFSIZ]
 354                                                 - nlp);
 355                                         p -= nlp - buf;
 356                                         nlp = buf;
 357                                 }
 358 
 359                         }
 360                         if (p > &buf[fw_lBufsiz]) {
 361                                 if ((ccount = fread(p, sizeof (char),
 362                                         &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
 363                                         <= 0)
 364                                         break;
 365                         } else if ((ccount = fread(p, sizeof (char),
 366                                 BUFSIZ, fptr)) <= 0)
 367                                 break;
 368                         blkno += (long long)ccount;
 369                 }
 370                 GETONE(lc, p);
 371 nstate:
 372                 if (same(c->inp, lc)) {
 373                         c = c->nst;
 374                 } else if (c->link != 0) {
 375                         c = c->link;
 376                         goto nstate;
 377                 } else {
 378                         c = c->fail;
 379                         failed = 1;
 380                         if (c == 0) {
 381                                 c = w;
 382 istate:
 383                                 if (same(c->inp, lc)) {
 384                                         c = c->nst;
 385                                 } else if (c->link != 0) {
 386                                         c = c->link;
 387                                         goto istate;
 388                                 }
 389                         } else
 390                                 goto nstate;
 391                 }
 392 
 393                 if (c == 0)
 394                         break;
 395 
 396                 if (c->out) {
 397                         while (lc != '\n') {
 398                                 if (ccount <= 0) {
 399 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
 400         if (nlp == buf) {
 401                 /* increase buffer size */
 402                 fw_lBufsiz += BUFSIZ;
 403                 if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
 404                         exit(2); /* out of memory */
 405                 }
 406                 nlp = buf;
 407                 p = &buf[fw_lBufsiz];
 408         } else {
 409                 /* shift buffer down */
 410                 (void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
 411                 p -= nlp - buf;
 412                 nlp = buf;
 413         }
 414 }
 415 if (p > &buf[fw_lBufsiz]) {
 416         if ((ccount = fread(p, sizeof (char),
 417                 &buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
 418         } else if ((ccount = fread(p, sizeof (char), BUFSIZ,
 419                 fptr)) <= 0) break;
 420                 blkno += (long long)ccount;
 421         }
 422         GETONE(lc, p);
 423 }
 424                         if ((vflag && (failed == 0 || xflag == 0)) ||
 425                                 (vflag == 0 && xflag && failed))
 426                                 goto nomatch;
 427 succeed:
 428                         nsucc = 1;
 429                         if (lflag || qflag) {
 430                                 if (!qflag)
 431                                         (void) printf("%s\n", file);
 432                                 (void) fclose(fptr);
 433                                 return;
 434                         }
 435                         if (cflag) {
 436                                 tln++;
 437                         } else {
 438                                 if (Hflag || (nfile > 1 && !hflag))
 439                                         (void) printf("%s:", file);
 440                                 if (bflag)
 441                                         (void) printf("%lld:",
 442                                                 (blkno - (long long)(ccount-1))
 443                                                 / BUFSIZ);
 444                                 if (nflag)
 445                                         (void) printf("%lld:", lnum);
 446                                 if (p <= nlp) {
 447                                         while (nlp < &buf[fw_lBufsiz + BUFSIZ])
 448                                                 (void) putchar(*nlp++);
 449                                         nlp = buf;
 450                                 }
 451                                 while (nlp < p)
 452                                         (void) putchar(*nlp++);
 453                         }
 454 nomatch:
 455                         lnum++;
 456                         nlp = p;
 457                         c = w;
 458                         failed = 0;
 459                         continue;
 460                 }
 461                 if (lc == '\n')
 462                         if (vflag)
 463                                 goto succeed;
 464                         else {
 465                                 lnum++;
 466                                 nlp = p;
 467                                 c = w;
 468                                 failed = 0;
 469                         }
 470         }
 471         (void) fclose(fptr);
 472         if (cflag && !qflag) {
 473                 if (Hflag || (nfile > 1 && !hflag))
 474                         (void) printf("%s:", file);
 475                 (void) printf("%lld\n", tln);
 476         }
 477 }
 478 
 479 
 480 wchar_t
 481 getargc(void)
 482 {
 483         /* appends a newline to shell quoted argument list so */
 484         /* the list looks like it came from an ed style file  */
 485         wchar_t c;
 486         int cw;
 487         int b;
 488         static int endflg;
 489 
 490 
 491         if (wordf) {
 492                 if ((b = getc(wordf)) == EOF)
 493                         return (EOF);
 494                 cw = ISASCII(c = (wchar_t)b) ? 1 :
 495                         (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
 496                 while (--cw) {
 497                         if ((b = getc(wordf)) == EOF)
 498                                 return (EOF);
 499                         c = (c << 7) | (b & 0177);
 500                 }
 501                 return (iflag ? letter(c) : c);
 502         }
 503 
 504         if (endflg)
 505                 return (EOF);
 506 
 507         {
 508                 cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
 509                         (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
 510 
 511                 while (--cw)
 512                         c = (c << 7) | ((*argptr++) & 0177);
 513                 if (c == '\0') {
 514                         endflg++;
 515                         return ('\n');
 516                 }
 517         }
 518         return (iflag ? letter(c) : c);
 519 
 520 
 521 }
 522 
 523 void
 524 cgotofn(void)
 525 {
 526         int c;
 527         struct words *s;
 528 
 529         s = smax = w;
 530 nword:
 531         for (;;) {
 532                 c = getargc();
 533                 if (c == EOF)
 534                         return;
 535                 if (c == 0)
 536                         goto enter;
 537                 if (c == '\n') {
 538                         if (xflag) {
 539                                 for (;;) {
 540                                         if (s->inp == c) {
 541                                                 s = s->nst;
 542                                                 break;
 543                                         }
 544                                         if (s->inp == 0)
 545                                                 goto nenter;
 546                                         if (s->link == 0) {
 547                                                 if (smax >= &w[input_size -1])
 548                                                         overflo();
 549                                                 s->link = ++smax;
 550                                                 s = smax;
 551                                                 goto nenter;
 552                                         }
 553                                         s = s->link;
 554                                 }
 555                         }
 556                         s->out = 1;
 557                         s = w;
 558                 } else {
 559 loop:
 560                         if (s->inp == c) {
 561                                 s = s->nst;
 562                                 continue;
 563                         }
 564                         if (s->inp == 0)
 565                                 goto enter;
 566                         if (s->link == 0) {
 567                                 if (smax >= &w[input_size -1])
 568                                         overflo();
 569                                 s->link = ++smax;
 570                                 s = smax;
 571                                 goto enter;
 572                         }
 573                         s = s->link;
 574                         goto loop;
 575                 }
 576         }
 577 
 578 enter:
 579         do {
 580                 s->inp = c;
 581                 if (smax >= &w[input_size -1])
 582                         overflo();
 583                 s->nst = ++smax;
 584                 s = smax;
 585         } while ((c = getargc()) != '\n' && c != EOF);
 586         if (xflag) {
 587 nenter:
 588                 s->inp = '\n';
 589                 if (smax >= &w[input_size -1])
 590                         overflo();
 591                 s->nst = ++smax;
 592         }
 593         smax->out = 1;
 594         s = w;
 595         if (c != EOF)
 596                 goto nword;
 597 }
 598 
 599 /*
 600  * This function is an unexpected condition, since input_size should have been
 601  * calculated correctly before hand.
 602  */
 603 
 604 void
 605 overflo(void)
 606 {
 607         (void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
 608         exit(2);
 609 }
 610 
 611 void
 612 cfail(void)
 613 {
 614         int qsize = QSIZE;
 615         struct words **queue = NULL;
 616 
 617         /*
 618          * front and rear are pointers used to traverse the global words
 619          * structure "w" which contains the data of input pattern file
 620          */
 621         struct words **front, **rear;
 622         struct words *state;
 623         unsigned long frontoffset = 0, rearoffset = 0;
 624         char c;
 625         struct words *s;
 626         s = w;
 627         if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
 628                                 == NULL) {
 629                 perror("fgrep");
 630                 exit(2);
 631         }
 632         front = rear = queue;
 633 init:
 634         if ((s->inp) != 0) {
 635                 *rear++ = s->nst;
 636         /*
 637          * Reallocates the queue if the number of distinct starting
 638          * character of patterns exceeds the qsize value
 639          */
 640                 if (rear >= &queue[qsize - 1]) {
 641                         frontoffset = front - queue;
 642                         rearoffset = rear - queue;
 643                         qsize += QSIZE;
 644                         if ((queue = (struct words **)realloc(queue,
 645                                 qsize * sizeof (struct words *))) == NULL) {
 646                                 perror("fgrep");
 647                                 exit(2);
 648                         }
 649                         front = queue + frontoffset;
 650                         rear = queue + rearoffset;
 651                 }
 652         }
 653         if ((s = s->link) != 0) {
 654                 goto init;
 655         }
 656 
 657         while (rear != front) {
 658                 s = *front++;
 659 cloop:
 660                 if ((c = s->inp) != 0) {
 661                         *rear++ = (q = s->nst);
 662                 /*
 663                  * Reallocate the queue if the rear pointer reaches the end
 664                  * queue
 665                  */
 666                         if (rear >= &queue[qsize - 1]) {
 667                                 frontoffset = front - queue;
 668                                 rearoffset = rear - queue;
 669                                 qsize += QSIZE;
 670                                 if ((queue = (struct words **)realloc(queue,
 671                                     qsize * sizeof (struct words *))) == NULL) {
 672                                         perror("fgrep");
 673                                         exit(2);
 674                                 }
 675                                 front = queue + frontoffset;
 676                                 rear = queue + rearoffset;
 677                         }
 678                         state = s->fail;
 679 floop:
 680                         if (state == 0)
 681                                 state = w;
 682                         if (state->inp == c) {
 683 qloop:
 684                                 q->fail = state->nst;
 685                                 if ((state->nst)->out == 1)
 686                                         q->out = 1;
 687                                 if ((q = q->link) != 0)
 688                                         goto qloop;
 689                         } else if ((state = state->link) != 0)
 690                                 goto floop;
 691                 }
 692                 if ((s = s->link) != 0)
 693                         goto cloop;
 694         }
 695 }
 696 
 697 wchar_t
 698 letter(wchar_t c)
 699 {
 700         if (c >= 'a' && c <= 'z')
 701                 return (c);
 702         if (c >= 'A' && c <= 'Z')
 703                 return (c + 'a' - 'A');
 704         return (c);
 705 }