1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * grep - pattern matching program - combined grep, egrep, and fgrep.
  29  *      Based on MKS grep command, with XCU & Solaris mods.
  30  */
  31 
  32 /*
  33  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
  34  *
  35  */
  36 
  37 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
  38 
  39 #include <string.h>
  40 #include <stdlib.h>
  41 #include <ctype.h>
  42 #include <stdarg.h>
  43 #include <regex.h>
  44 #include <limits.h>
  45 #include <sys/types.h>
  46 #include <sys/stat.h>
  47 #include <fcntl.h>
  48 #include <stdio.h>
  49 #include <locale.h>
  50 #include <wchar.h>
  51 #include <errno.h>
  52 #include <unistd.h>
  53 #include <wctype.h>
  54 #include <ftw.h>
  55 #include <sys/param.h>
  56 
  57 #define STDIN_FILENAME gettext("(standard input)")
  58 
  59 #define BSIZE           512             /* Size of block for -b */
  60 #define BUFSIZE         8192            /* Input buffer size */
  61 #define MAX_DEPTH       1000            /* how deep to recurse */
  62 
  63 #define M_CSETSIZE      256             /* singlebyte chars */
  64 static int      bmglen;                 /* length of BMG pattern */
  65 static char     *bmgpat;                /* BMG pattern */
  66 static int      bmgtab[M_CSETSIZE];     /* BMG delta1 table */
  67 
  68 typedef struct  _PATTERN        {
  69         char    *pattern;               /* original pattern */
  70         wchar_t *wpattern;              /* wide, lowercased pattern */
  71         struct  _PATTERN        *next;
  72         regex_t re;                     /* compiled pattern */
  73 } PATTERN;
  74 
  75 static PATTERN  *patterns;
  76 static char     errstr[128];            /* regerror string buffer */
  77 static int      regflags = 0;           /* regcomp options */
  78 static int      matched = 0;            /* return of the grep() */
  79 static int      errors = 0;             /* count of errors */
  80 static uchar_t  fgrep = 0;              /* Invoked as fgrep */
  81 static uchar_t  egrep = 0;              /* Invoked as egrep */
  82 static uchar_t  nvflag = 1;             /* Print matching lines */
  83 static uchar_t  cflag;                  /* Count of matches */
  84 static uchar_t  iflag;                  /* Case insensitve matching */
  85 static uchar_t  Hflag;                  /* Precede lines by file name */
  86 static uchar_t  hflag;                  /* Supress printing of filename */
  87 static uchar_t  lflag;                  /* Print file names of matches */
  88 static uchar_t  nflag;                  /* Precede lines by line number */
  89 static uchar_t  rflag;                  /* Search directories recursively */
  90 static uchar_t  bflag;                  /* Preccede matches by block number */
  91 static uchar_t  sflag;                  /* Suppress file error messages */
  92 static uchar_t  qflag;                  /* Suppress standard output */
  93 static uchar_t  wflag;                  /* Search for expression as a word */
  94 static uchar_t  xflag;                  /* Anchoring */
  95 static uchar_t  Eflag;                  /* Egrep or -E flag */
  96 static uchar_t  Fflag;                  /* Fgrep or -F flag */
  97 static uchar_t  Rflag;                  /* Like rflag, but follow symlinks */
  98 static uchar_t  outfn;                  /* Put out file name */
  99 static char     *cmdname;
 100 
 101 static int      use_wchar, use_bmg, mblocale;
 102 
 103 static size_t   outbuflen, prntbuflen;
 104 static char     *prntbuf;
 105 static wchar_t  *outline;
 106 
 107 static void     addfile(const char *fn);
 108 static void     addpattern(char *s);
 109 static void     fixpatterns(void);
 110 static void     usage(void);
 111 static int      grep(int, const char *);
 112 static void     bmgcomp(char *, int);
 113 static char     *bmgexec(char *, char *);
 114 static int      recursive(const char *, const struct stat *, int, struct FTW *);
 115 static void     process_path(const char *);
 116 static void     process_file(const char *, int);
 117 
 118 /*
 119  * mainline for grep
 120  */
 121 int
 122 main(int argc, char **argv)
 123 {
 124         char    *ap;
 125         int     c;
 126         int     fflag = 0;
 127         int     i, n_pattern = 0, n_file = 0;
 128         char    **pattern_list = NULL;
 129         char    **file_list = NULL;
 130 
 131         (void) setlocale(LC_ALL, "");
 132 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 133 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
 134 #endif
 135         (void) textdomain(TEXT_DOMAIN);
 136 
 137         /*
 138          * true if this is running on the multibyte locale
 139          */
 140         mblocale = (MB_CUR_MAX > 1);
 141         /*
 142          * Skip leading slashes
 143          */
 144         cmdname = argv[0];
 145         if (ap = strrchr(cmdname, '/'))
 146                 cmdname = ap + 1;
 147 
 148         ap = cmdname;
 149         /*
 150          * Detect egrep/fgrep via command name, map to -E and -F options.
 151          */
 152         if (*ap == 'e' || *ap == 'E') {
 153                 regflags |= REG_EXTENDED;
 154                 egrep++;
 155         } else {
 156                 if (*ap == 'f' || *ap == 'F') {
 157                         fgrep++;
 158                 }
 159         }
 160 
 161         while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
 162                 switch (c) {
 163                 case 'v':       /* POSIX: negate matches */
 164                         nvflag = 0;
 165                         break;
 166 
 167                 case 'c':       /* POSIX: write count */
 168                         cflag++;
 169                         break;
 170 
 171                 case 'i':       /* POSIX: ignore case */
 172                         iflag++;
 173                         regflags |= REG_ICASE;
 174                         break;
 175 
 176                 case 'l':       /* POSIX: Write filenames only */
 177                         lflag++;
 178                         break;
 179 
 180                 case 'n':       /* POSIX: Write line numbers */
 181                         nflag++;
 182                         break;
 183 
 184                 case 'r':       /* Solaris: search recursively */
 185                         rflag++;
 186                         break;
 187 
 188                 case 'b':       /* Solaris: Write file block numbers */
 189                         bflag++;
 190                         break;
 191 
 192                 case 's':       /* POSIX: No error msgs for files */
 193                         sflag++;
 194                         break;
 195 
 196                 case 'e':       /* POSIX: pattern list */
 197                         n_pattern++;
 198                         pattern_list = realloc(pattern_list,
 199                             sizeof (char *) * n_pattern);
 200                         if (pattern_list == NULL) {
 201                                 (void) fprintf(stderr,
 202                                     gettext("%s: out of memory\n"),
 203                                     cmdname);
 204                                 exit(2);
 205                         }
 206                         *(pattern_list + n_pattern - 1) = optarg;
 207                         break;
 208 
 209                 case 'f':       /* POSIX: pattern file */
 210                         fflag = 1;
 211                         n_file++;
 212                         file_list = realloc(file_list,
 213                             sizeof (char *) * n_file);
 214                         if (file_list == NULL) {
 215                                 (void) fprintf(stderr,
 216                                     gettext("%s: out of memory\n"),
 217                                     cmdname);
 218                                 exit(2);
 219                         }
 220                         *(file_list + n_file - 1) = optarg;
 221                         break;
 222 
 223                 /* based on options order h or H is set as in GNU grep */
 224                 case 'h':       /* Solaris: supress printing of file name */
 225                         hflag = 1;
 226                         Hflag = 0;
 227                         break;
 228                 /* Solaris: precede every matching with file name */
 229                 case 'H':
 230                         Hflag = 1;
 231                         hflag = 0;
 232                         break;
 233 
 234                 case 'q':       /* POSIX: quiet: status only */
 235                         qflag++;
 236                         break;
 237 
 238                 case 'w':       /* Solaris: treat pattern as word */
 239                         wflag++;
 240                         break;
 241 
 242                 case 'x':       /* POSIX: full line matches */
 243                         xflag++;
 244                         regflags |= REG_ANCHOR;
 245                         break;
 246 
 247                 case 'E':       /* POSIX: Extended RE's */
 248                         regflags |= REG_EXTENDED;
 249                         Eflag++;
 250                         break;
 251 
 252                 case 'F':       /* POSIX: strings, not RE's */
 253                         Fflag++;
 254                         break;
 255 
 256                 case 'R':       /* Solaris: like rflag, but follow symlinks */
 257                         Rflag++;
 258                         rflag++;
 259                         break;
 260 
 261                 default:
 262                         usage();
 263                 }
 264         }
 265         /*
 266          * If we're invoked as egrep or fgrep we need to do some checks
 267          */
 268 
 269         if (egrep || fgrep) {
 270                 /*
 271                  * Use of -E or -F with egrep or fgrep is illegal
 272                  */
 273                 if (Eflag || Fflag)
 274                         usage();
 275                 /*
 276                  * Don't allow use of wflag with egrep / fgrep
 277                  */
 278                 if (wflag)
 279                         usage();
 280                 /*
 281                  * For Solaris the -s flag is equivalent to XCU -q
 282                  */
 283                 if (sflag)
 284                         qflag++;
 285                 /*
 286                  * done with above checks - set the appropriate flags
 287                  */
 288                 if (egrep)
 289                         Eflag++;
 290                 else                    /* Else fgrep */
 291                         Fflag++;
 292         }
 293 
 294         if (wflag && (Eflag || Fflag)) {
 295                 /*
 296                  * -w cannot be specified with grep -F
 297                  */
 298                 usage();
 299         }
 300 
 301         /*
 302          * -E and -F flags are mutually exclusive - check for this
 303          */
 304         if (Eflag && Fflag)
 305                 usage();
 306 
 307         /*
 308          * -l overrides -H like in GNU grep
 309          */
 310         if (lflag)
 311                 Hflag = 0;
 312 
 313         /*
 314          * -c, -l and -q flags are mutually exclusive
 315          * We have -c override -l like in Solaris.
 316          * -q overrides -l & -c programmatically in grep() function.
 317          */
 318         if (cflag && lflag)
 319                 lflag = 0;
 320 
 321         argv += optind - 1;
 322         argc -= optind - 1;
 323 
 324         /*
 325          * Now handling -e and -f option
 326          */
 327         if (pattern_list) {
 328                 for (i = 0; i < n_pattern; i++) {
 329                         addpattern(pattern_list[i]);
 330                 }
 331                 free(pattern_list);
 332         }
 333         if (file_list) {
 334                 for (i = 0; i < n_file; i++) {
 335                         addfile(file_list[i]);
 336                 }
 337                 free(file_list);
 338         }
 339 
 340         /*
 341          * No -e or -f?  Make sure there is one more arg, use it as the pattern.
 342          */
 343         if (patterns == NULL && !fflag) {
 344                 if (argc < 2)
 345                         usage();
 346                 addpattern(argv[1]);
 347                 argc--;
 348                 argv++;
 349         }
 350 
 351         /*
 352          * If -x flag is not specified or -i flag is specified
 353          * with fgrep in a multibyte locale, need to use
 354          * the wide character APIs.  Otherwise, byte-oriented
 355          * process will be done.
 356          */
 357         use_wchar = Fflag && mblocale && (!xflag || iflag);
 358 
 359         /*
 360          * Compile Patterns and also decide if BMG can be used
 361          */
 362         fixpatterns();
 363 
 364         /* Process all files: stdin, or rest of arg list */
 365         if (argc < 2) {
 366                 matched = grep(0, STDIN_FILENAME);
 367         } else {
 368                 if (Hflag || (argc > 2 && hflag == 0))
 369                         outfn = 1;      /* Print filename on match line */
 370                 for (argv++; *argv != NULL; argv++) {
 371                         process_path(*argv);
 372                 }
 373         }
 374         /*
 375          * Return() here is used instead of exit
 376          */
 377 
 378         (void) fflush(stdout);
 379 
 380         if (errors)
 381                 return (2);
 382         return (matched ? 0 : 1);
 383 }
 384 
 385 static void
 386 process_path(const char *path)
 387 {
 388         struct  stat st;
 389         int     walkflags = FTW_CHDIR;
 390         char    *buf = NULL;
 391 
 392         if (rflag) {
 393                 if (stat(path, &st) != -1 &&
 394                     (st.st_mode & S_IFMT) == S_IFDIR) {
 395                         outfn = 1; /* Print filename */
 396 
 397                         /*
 398                          * Add trailing slash if arg
 399                          * is directory, to resolve symlinks.
 400                          */
 401                         if (path[strlen(path) - 1] != '/') {
 402                                 (void) asprintf(&buf, "%s/", path);
 403                                 if (buf != NULL)
 404                                         path = buf;
 405                         }
 406 
 407                         /*
 408                          * Search through subdirs if path is directory.
 409                          * Don't follow symlinks if Rflag is not set.
 410                          */
 411                         if (!Rflag)
 412                                 walkflags |= FTW_PHYS;
 413 
 414                         if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
 415                                 if (!sflag)
 416                                         (void) fprintf(stderr,
 417                                             gettext("%s: can't open \"%s\"\n"),
 418                                             cmdname, path);
 419                                 errors = 1;
 420                         }
 421                         return;
 422                 }
 423         }
 424         process_file(path, 0);
 425 }
 426 
 427 /*
 428  * Read and process all files in directory recursively.
 429  */
 430 static int
 431 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
 432 {
 433         /*
 434          * Process files and follow symlinks if Rflag set.
 435          */
 436         if (info != FTW_F) {
 437                 /* Report broken symlinks and unreadable files */
 438                 if (!sflag &&
 439                     (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
 440                         (void) fprintf(stderr,
 441                             gettext("%s: can't open \"%s\"\n"), cmdname, name);
 442                 }
 443                 return (0);
 444         }
 445 
 446 
 447         /* Skip devices and pipes if Rflag is not set */
 448         if (!Rflag && !S_ISREG(statp->st_mode))
 449                 return (0);
 450         /* Pass offset to relative name from FTW_CHDIR */
 451         process_file(name, ftw->base);
 452         return (0);
 453 }
 454 
 455 /*
 456  * Opens file and call grep function.
 457  */
 458 static void
 459 process_file(const char *name, int base)
 460 {
 461         int fd;
 462 
 463         if ((fd = open(name + base, O_RDONLY)) == -1) {
 464                 errors = 1;
 465                 if (!sflag) /* Silent mode */
 466                         (void) fprintf(stderr, gettext(
 467                             "%s: can't open \"%s\"\n"),
 468                             cmdname, name);
 469                 return;
 470         }
 471         matched |= grep(fd, name);
 472         (void) close(fd);
 473 
 474         if (ferror(stdout)) {
 475                 (void) fprintf(stderr, gettext(
 476                     "%s: error writing to stdout\n"),
 477                     cmdname);
 478                 (void) fflush(stdout);
 479                 exit(2);
 480         }
 481 
 482 }
 483 
 484 /*
 485  * Add a file of strings to the pattern list.
 486  */
 487 static void
 488 addfile(const char *fn)
 489 {
 490         FILE    *fp;
 491         char    *inbuf;
 492         char    *bufp;
 493         size_t  bufsiz, buflen, bufused;
 494 
 495         /*
 496          * Open the pattern file
 497          */
 498         if ((fp = fopen(fn, "r")) == NULL) {
 499                 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
 500                     cmdname, fn);
 501                 exit(2);
 502         }
 503         bufsiz = BUFSIZE;
 504         if ((inbuf = malloc(bufsiz)) == NULL) {
 505                 (void) fprintf(stderr,
 506                     gettext("%s: out of memory\n"), cmdname);
 507                 exit(2);
 508         }
 509         bufp = inbuf;
 510         bufused = 0;
 511         /*
 512          * Read in the file, reallocing as we need more memory
 513          */
 514         while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
 515                 buflen = strlen(bufp);
 516                 bufused += buflen;
 517                 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
 518                         /*
 519                          * if this line does not fit to the buffer,
 520                          * realloc larger buffer
 521                          */
 522                         bufsiz += BUFSIZE;
 523                         if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
 524                                 (void) fprintf(stderr,
 525                                     gettext("%s: out of memory\n"),
 526                                     cmdname);
 527                                 exit(2);
 528                         }
 529                         bufp = inbuf + bufused;
 530                         continue;
 531                 }
 532                 if (bufp[buflen - 1] == '\n') {
 533                         bufp[--buflen] = '\0';
 534                 }
 535                 addpattern(inbuf);
 536 
 537                 bufp = inbuf;
 538                 bufused = 0;
 539         }
 540         free(inbuf);
 541         (void) fclose(fp);
 542 }
 543 
 544 /*
 545  * Add a string to the pattern list.
 546  */
 547 static void
 548 addpattern(char *s)
 549 {
 550         PATTERN *pp;
 551         char    *wordbuf;
 552         char    *np;
 553 
 554         for (; ; ) {
 555                 np = strchr(s, '\n');
 556                 if (np != NULL)
 557                         *np = '\0';
 558                 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
 559                         (void) fprintf(stderr, gettext(
 560                             "%s: out of memory\n"),
 561                             cmdname);
 562                         exit(2);
 563                 }
 564                 if (wflag) {
 565                         /*
 566                          * Solaris wflag support: Add '<' '>' to pattern to
 567                          * select it as a word. Doesn't make sense with -F
 568                          * but we're Libertarian.
 569                          */
 570                         size_t  slen, wordlen;
 571 
 572                         slen = strlen(s);
 573                         wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
 574                         if ((wordbuf = malloc(wordlen)) == NULL) {
 575                                 (void) fprintf(stderr,
 576                                     gettext("%s: out of memory\n"),
 577                                     cmdname);
 578                                 exit(2);
 579                         }
 580                         (void) strcpy(wordbuf, "\\<");
 581                         (void) strcpy(wordbuf + 2, s);
 582                         (void) strcpy(wordbuf + 2 + slen, "\\>");
 583                 } else {
 584                         if ((wordbuf = strdup(s)) == NULL) {
 585                                 (void) fprintf(stderr,
 586                                     gettext("%s: out of memory\n"),
 587                                     cmdname);
 588                                 exit(2);
 589                         }
 590                 }
 591                 pp->pattern = wordbuf;
 592                 pp->next = patterns;
 593                 patterns = pp;
 594                 if (np == NULL)
 595                         break;
 596                 s = np + 1;
 597         }
 598 }
 599 
 600 /*
 601  * Fix patterns.
 602  * Must do after all arguments read, in case later -i option.
 603  */
 604 static void
 605 fixpatterns(void)
 606 {
 607         PATTERN *pp;
 608         int     rv, fix_pattern, npatterns;
 609 
 610         /*
 611          * As REG_ANCHOR flag is not supported in the current Solaris,
 612          * need to fix the specified pattern if -x is specified with
 613          * grep or egrep
 614          */
 615         fix_pattern = !Fflag && xflag;
 616 
 617         for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
 618                 npatterns++;
 619                 if (fix_pattern) {
 620                         char    *cp, *cq;
 621                         size_t  plen, nplen;
 622 
 623                         plen = strlen(pp->pattern);
 624                         /* '^' pattern '$' */
 625                         nplen = 1 + plen + 1 + 1;
 626                         if ((cp = malloc(nplen)) == NULL) {
 627                                 (void) fprintf(stderr,
 628                                     gettext("%s: out of memory\n"),
 629                                     cmdname);
 630                                 exit(2);
 631                         }
 632                         cq = cp;
 633                         *cq++ = '^';
 634                         cq = strcpy(cq, pp->pattern) + plen;
 635                         *cq++ = '$';
 636                         *cq = '\0';
 637                         free(pp->pattern);
 638                         pp->pattern = cp;
 639                 }
 640 
 641                 if (Fflag) {
 642                         if (use_wchar) {
 643                                 /*
 644                                  * Fflag && mblocale && iflag
 645                                  * Fflag && mblocale && !xflag
 646                                  */
 647                                 size_t  n;
 648                                 n = strlen(pp->pattern) + 1;
 649                                 if ((pp->wpattern =
 650                                     malloc(sizeof (wchar_t) * n)) == NULL) {
 651                                         (void) fprintf(stderr,
 652                                             gettext("%s: out of memory\n"),
 653                                             cmdname);
 654                                         exit(2);
 655                                 }
 656                                 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
 657                                     (size_t)-1) {
 658                                         (void) fprintf(stderr,
 659                                             gettext("%s: failed to convert "
 660                                             "\"%s\" to wide-characters\n"),
 661                                             cmdname, pp->pattern);
 662                                         exit(2);
 663                                 }
 664                                 if (iflag) {
 665                                         wchar_t *wp;
 666                                         for (wp = pp->wpattern; *wp != L'\0';
 667                                             wp++) {
 668                                                 *wp = towlower((wint_t)*wp);
 669                                         }
 670                                 }
 671                                 free(pp->pattern);
 672                         } else {
 673                                 /*
 674                                  * Fflag && mblocale && !iflag
 675                                  * Fflag && !mblocale && iflag
 676                                  * Fflag && !mblocale && !iflag
 677                                  */
 678                                 if (iflag) {
 679                                         unsigned char   *cp;
 680                                         for (cp = (unsigned char *)pp->pattern;
 681                                             *cp != '\0'; cp++) {
 682                                                 *cp = tolower(*cp);
 683                                         }
 684                                 }
 685                         }
 686                         /*
 687                          * fgrep: No regular expressions.
 688                          */
 689                         continue;
 690                 }
 691 
 692                 /*
 693                  * For non-fgrep, compile the regular expression,
 694                  * give an informative error message, and exit if
 695                  * it didn't compile.
 696                  */
 697                 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
 698                         (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
 699                         (void) fprintf(stderr,
 700                             gettext("%s: RE error in %s: %s\n"),
 701                             cmdname, pp->pattern, errstr);
 702                         exit(2);
 703                 }
 704                 free(pp->pattern);
 705         }
 706 
 707         /*
 708          * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
 709          * Use the Boyer-Moore-Gosper algorithm if:
 710          * - fgrep                      (Fflag)
 711          * - singlebyte locale          (!mblocale)
 712          * - no ignoring case           (!iflag)
 713          * - no printing line numbers   (!nflag)
 714          * - no negating the output     (nvflag)
 715          * - only one pattern           (npatterns == 1)
 716          * - non zero length pattern    (strlen(patterns->pattern) != 0)
 717          *
 718          * It's guaranteed patterns->pattern is still alive
 719          * when Fflag && !mblocale.
 720          */
 721         use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
 722             (npatterns == 1) && (strlen(patterns->pattern) != 0);
 723 }
 724 
 725 /*
 726  * Search a newline from the beginning of the string
 727  */
 728 static char *
 729 find_nl(const char *ptr, size_t len)
 730 {
 731         while (len-- != 0) {
 732                 if (*ptr++ == '\n') {
 733                         return ((char *)--ptr);
 734                 }
 735         }
 736         return (NULL);
 737 }
 738 
 739 /*
 740  * Search a newline from the end of the string
 741  */
 742 static char *
 743 rfind_nl(const char *ptr, size_t len)
 744 {
 745         const char      *uptr = ptr + len;
 746         while (len--) {
 747                 if (*--uptr == '\n') {
 748                         return ((char *)uptr);
 749                 }
 750         }
 751         return (NULL);
 752 }
 753 
 754 /*
 755  * Duplicate the specified string converting each character
 756  * into a lower case.
 757  */
 758 static char *
 759 istrdup(const char *s1)
 760 {
 761         static size_t   ibuflen = 0;
 762         static char     *ibuf = NULL;
 763         size_t  slen;
 764         char    *p;
 765 
 766         slen = strlen(s1);
 767         if (slen >= ibuflen) {
 768                 /* ibuf does not fit to s1 */
 769                 ibuflen = slen + 1;
 770                 ibuf = realloc(ibuf, ibuflen);
 771                 if (ibuf == NULL) {
 772                         (void) fprintf(stderr,
 773                             gettext("%s: out of memory\n"), cmdname);
 774                         exit(2);
 775                 }
 776         }
 777         p = ibuf;
 778         do {
 779                 *p++ = tolower(*s1);
 780         } while (*s1++ != '\0');
 781         return (ibuf);
 782 }
 783 
 784 /*
 785  * Do grep on a single file.
 786  * Return true in any lines matched.
 787  *
 788  * We have two strategies:
 789  * The fast one is used when we have a single pattern with
 790  * a string known to occur in the pattern. We can then
 791  * do a BMG match on the whole buffer.
 792  * This is an order of magnitude faster.
 793  * Otherwise we split the buffer into lines,
 794  * and check for a match on each line.
 795  */
 796 static int
 797 grep(int fd, const char *fn)
 798 {
 799         PATTERN *pp;
 800         off_t   data_len;       /* length of the data chunk */
 801         off_t   line_len;       /* length of the current line */
 802         off_t   line_offset;    /* current line's offset from the beginning */
 803         long long       lineno;
 804         long long       matches = 0;    /* Number of matching lines */
 805         int     newlinep;       /* 0 if the last line of file has no newline */
 806         char    *ptr, *ptrend;
 807 
 808 
 809         if (patterns == NULL)
 810                 return (0);     /* no patterns to match -- just return */
 811 
 812         pp = patterns;
 813 
 814         if (use_bmg) {
 815                 bmgcomp(pp->pattern, strlen(pp->pattern));
 816         }
 817 
 818         if (use_wchar && outline == NULL) {
 819                 outbuflen = BUFSIZE + 1;
 820                 outline = malloc(sizeof (wchar_t) * outbuflen);
 821                 if (outline == NULL) {
 822                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 823                             cmdname);
 824                         exit(2);
 825                 }
 826         }
 827 
 828         if (prntbuf == NULL) {
 829                 prntbuflen = BUFSIZE;
 830                 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
 831                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 832                             cmdname);
 833                         exit(2);
 834                 }
 835         }
 836 
 837         line_offset = 0;
 838         lineno = 0;
 839         newlinep = 1;
 840         data_len = 0;
 841         for (; ; ) {
 842                 long    count;
 843                 off_t   offset = 0;
 844 
 845                 if (data_len == 0) {
 846                         /*
 847                          * If no data in the buffer, reset ptr
 848                          */
 849                         ptr = prntbuf;
 850                 }
 851                 if (ptr == prntbuf) {
 852                         /*
 853                          * The current data chunk starts from prntbuf.
 854                          * This means either the buffer has no data
 855                          * or the buffer has no newline.
 856                          * So, read more data from input.
 857                          */
 858                         count = read(fd, ptr + data_len, prntbuflen - data_len);
 859                         if (count < 0) {
 860                                 /* read error */
 861                                 if (cflag) {
 862                                         if (outfn && !rflag) {
 863                                                 (void) fprintf(stdout,
 864                                                     "%s:", fn);
 865                                         }
 866                                         if (!qflag && !rflag) {
 867                                                 (void) fprintf(stdout, "%lld\n",
 868                                                     matches);
 869                                         }
 870                                 }
 871                                 return (0);
 872                         } else if (count == 0) {
 873                                 /* no new data */
 874                                 if (data_len == 0) {
 875                                         /* end of file already reached */
 876                                         break;
 877                                 }
 878                                 /* last line of file has no newline */
 879                                 ptrend = ptr + data_len;
 880                                 newlinep = 0;
 881                                 goto L_start_process;
 882                         }
 883                         offset = data_len;
 884                         data_len += count;
 885                 }
 886 
 887                 /*
 888                  * Look for newline in the chunk
 889                  * between ptr + offset and ptr + data_len - offset.
 890                  */
 891                 ptrend = find_nl(ptr + offset, data_len - offset);
 892                 if (ptrend == NULL) {
 893                         /* no newline found in this chunk */
 894                         if (ptr > prntbuf) {
 895                                 /*
 896                                  * Move remaining data to the beginning
 897                                  * of the buffer.
 898                                  * Remaining data lie from ptr for
 899                                  * data_len bytes.
 900                                  */
 901                                 (void) memmove(prntbuf, ptr, data_len);
 902                         }
 903                         if (data_len == prntbuflen) {
 904                                 /*
 905                                  * No enough room in the buffer
 906                                  */
 907                                 prntbuflen += BUFSIZE;
 908                                 prntbuf = realloc(prntbuf, prntbuflen + 1);
 909                                 if (prntbuf == NULL) {
 910                                         (void) fprintf(stderr,
 911                                             gettext("%s: out of memory\n"),
 912                                             cmdname);
 913                                         exit(2);
 914                                 }
 915                         }
 916                         ptr = prntbuf;
 917                         /* read the next input */
 918                         continue;
 919                 }
 920 L_start_process:
 921 
 922                 /*
 923                  * Beginning of the chunk:      ptr
 924                  * End of the chunk:            ptr + data_len
 925                  * Beginning of the line:       ptr
 926                  * End of the line:             ptrend
 927                  */
 928 
 929                 if (use_bmg) {
 930                         /*
 931                          * Use Boyer-Moore-Gosper algorithm to find out if
 932                          * this chunk (not this line) contains the specified
 933                          * pattern.  If not, restart from the last line
 934                          * of this chunk.
 935                          */
 936                         char    *bline;
 937                         bline = bmgexec(ptr, ptr + data_len);
 938                         if (bline == NULL) {
 939                                 /*
 940                                  * No pattern found in this chunk.
 941                                  * Need to find the last line
 942                                  * in this chunk.
 943                                  */
 944                                 ptrend = rfind_nl(ptr, data_len);
 945 
 946                                 /*
 947                                  * When this chunk does not contain newline,
 948                                  * ptrend becomes NULL, which should happen
 949                                  * when the last line of file does not end
 950                                  * with a newline.  At such a point,
 951                                  * newlinep should have been set to 0.
 952                                  * Therefore, just after jumping to
 953                                  * L_skip_line, the main for-loop quits,
 954                                  * and the line_len value won't be
 955                                  * used.
 956                                  */
 957                                 line_len = ptrend - ptr;
 958                                 goto L_skip_line;
 959                         }
 960                         if (bline > ptrend) {
 961                                 /*
 962                                  * Pattern found not in the first line
 963                                  * of this chunk.
 964                                  * Discard the first line.
 965                                  */
 966                                 line_len = ptrend - ptr;
 967                                 goto L_skip_line;
 968                         }
 969                         /*
 970                          * Pattern found in the first line of this chunk.
 971                          * Using this result.
 972                          */
 973                         *ptrend = '\0';
 974                         line_len = ptrend - ptr;
 975 
 976                         /*
 977                          * before jumping to L_next_line,
 978                          * need to handle xflag if specified
 979                          */
 980                         if (xflag && (line_len != bmglen ||
 981                             strcmp(bmgpat, ptr) != 0)) {
 982                                 /* didn't match */
 983                                 pp = NULL;
 984                         } else {
 985                                 pp = patterns; /* to make it happen */
 986                         }
 987                         goto L_next_line;
 988                 }
 989                 lineno++;
 990                 /*
 991                  * Line starts from ptr and ends at ptrend.
 992                  * line_len will be the length of the line.
 993                  */
 994                 *ptrend = '\0';
 995                 line_len = ptrend - ptr;
 996 
 997                 /*
 998                  * From now, the process will be performed based
 999                  * on the line from ptr to ptrend.
1000                  */
1001                 if (use_wchar) {
1002                         size_t  len;
1003 
1004                         if (line_len >= outbuflen) {
1005                                 outbuflen = line_len + 1;
1006                                 outline = realloc(outline,
1007                                     sizeof (wchar_t) * outbuflen);
1008                                 if (outline == NULL) {
1009                                         (void) fprintf(stderr,
1010                                             gettext("%s: out of memory\n"),
1011                                             cmdname);
1012                                         exit(2);
1013                                 }
1014                         }
1015 
1016                         len = mbstowcs(outline, ptr, line_len);
1017                         if (len == (size_t)-1) {
1018                                 (void) fprintf(stderr, gettext(
1019         "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1020                                     cmdname, fn, lineno);
1021                                 /* never match a line with invalid sequence */
1022                                 goto L_skip_line;
1023                         }
1024                         outline[len] = L'\0';
1025 
1026                         if (iflag) {
1027                                 wchar_t *cp;
1028                                 for (cp = outline; *cp != '\0'; cp++) {
1029                                         *cp = towlower((wint_t)*cp);
1030                                 }
1031                         }
1032 
1033                         if (xflag) {
1034                                 for (pp = patterns; pp; pp = pp->next) {
1035                                         if (outline[0] == pp->wpattern[0] &&
1036                                             wcscmp(outline,
1037                                             pp->wpattern) == 0) {
1038                                                 /* matched */
1039                                                 break;
1040                                         }
1041                                 }
1042                         } else {
1043                                 for (pp = patterns; pp; pp = pp->next) {
1044                                         if (wcswcs(outline, pp->wpattern)
1045                                             != NULL) {
1046                                                 /* matched */
1047                                                 break;
1048                                         }
1049                                 }
1050                         }
1051                 } else if (Fflag) {
1052                         /* fgrep in byte-oriented handling */
1053                         char    *fptr;
1054                         if (iflag) {
1055                                 fptr = istrdup(ptr);
1056                         } else {
1057                                 fptr = ptr;
1058                         }
1059                         if (xflag) {
1060                                 /* fgrep -x */
1061                                 for (pp = patterns; pp; pp = pp->next) {
1062                                         if (fptr[0] == pp->pattern[0] &&
1063                                             strcmp(fptr, pp->pattern) == 0) {
1064                                                 /* matched */
1065                                                 break;
1066                                         }
1067                                 }
1068                         } else {
1069                                 for (pp = patterns; pp; pp = pp->next) {
1070                                         if (strstr(fptr, pp->pattern) != NULL) {
1071                                                 /* matched */
1072                                                 break;
1073                                         }
1074                                 }
1075                         }
1076                 } else {
1077                         /* grep or egrep */
1078                         for (pp = patterns; pp; pp = pp->next) {
1079                                 int     rv;
1080 
1081                                 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1082                                 if (rv == REG_OK) {
1083                                         /* matched */
1084                                         break;
1085                                 }
1086 
1087                                 switch (rv) {
1088                                 case REG_NOMATCH:
1089                                         break;
1090                                 case REG_ECHAR:
1091                                         (void) fprintf(stderr, gettext(
1092             "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1093                                             cmdname, fn, lineno);
1094                                         break;
1095                                 default:
1096                                         (void) regerror(rv, &pp->re, errstr,
1097                                             sizeof (errstr));
1098                                         (void) fprintf(stderr, gettext(
1099             "%s: input file \"%s\": line %lld: %s\n"),
1100                                             cmdname, fn, lineno, errstr);
1101                                         exit(2);
1102                                 }
1103                         }
1104                 }
1105 
1106 L_next_line:
1107                 /*
1108                  * Here, if pp points to non-NULL, something has been matched
1109                  * to the pattern.
1110                  */
1111                 if (nvflag == (pp != NULL)) {
1112                         matches++;
1113                         /*
1114                          * Handle q, l, and c flags.
1115                          */
1116                         if (qflag) {
1117                                 /* no need to continue */
1118                                 /*
1119                                  * End of this line is ptrend.
1120                                  * We have read up to ptr + data_len.
1121                                  */
1122                                 off_t   pos;
1123                                 pos = ptr + data_len - (ptrend + 1);
1124                                 (void) lseek(fd, -pos, SEEK_CUR);
1125                                 exit(0);
1126                         }
1127                         if (lflag) {
1128                                 (void) printf("%s\n", fn);
1129                                 break;
1130                         }
1131                         if (!cflag) {
1132                                 if (Hflag || outfn) {
1133                                         (void) printf("%s:", fn);
1134                                 }
1135                                 if (bflag) {
1136                                         (void) printf("%lld:", (offset_t)
1137                                             (line_offset / BSIZE));
1138                                 }
1139                                 if (nflag) {
1140                                         (void) printf("%lld:", lineno);
1141                                 }
1142                                 *ptrend = '\n';
1143                                 (void) fwrite(ptr, 1, line_len + 1, stdout);
1144                         }
1145                         if (ferror(stdout)) {
1146                                 return (0);
1147                         }
1148                 }
1149 L_skip_line:
1150                 if (!newlinep)
1151                         break;
1152 
1153                 data_len -= line_len + 1;
1154                 line_offset += line_len + 1;
1155                 ptr = ptrend + 1;
1156         }
1157 
1158         if (cflag) {
1159                 if (Hflag || outfn) {
1160                         (void) printf("%s:", fn);
1161                 }
1162                 if (!qflag) {
1163                         (void) printf("%lld\n", matches);
1164                 }
1165         }
1166         return (matches != 0);
1167 }
1168 
1169 /*
1170  * usage message for grep
1171  */
1172 static void
1173 usage(void)
1174 {
1175         if (egrep || fgrep) {
1176                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1177                 (void) fprintf(stderr,
1178                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1179                     "pattern_list [file ...]\n"));
1180 
1181                 (void) fprintf(stderr, "\t%s", cmdname);
1182                 (void) fprintf(stderr,
1183                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1184                     "[-e pattern_list]... "
1185                     "[-f pattern_file]... [file...]\n"));
1186         } else {
1187                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1188                 (void) fprintf(stderr,
1189                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1190                     "pattern_list [file ...]\n"));
1191 
1192                 (void) fprintf(stderr, "\t%s", cmdname);
1193                 (void) fprintf(stderr,
1194                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1195                     "[-e pattern_list]... "
1196                     "[-f pattern_file]... [file...]\n"));
1197 
1198                 (void) fprintf(stderr, "\t%s", cmdname);
1199                 (void) fprintf(stderr,
1200                     gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1201                     "pattern_list [file ...]\n"));
1202 
1203                 (void) fprintf(stderr, "\t%s", cmdname);
1204                 (void) fprintf(stderr,
1205                     gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1206                     "[-e pattern_list]... "
1207                     "[-f pattern_file]... [file...]\n"));
1208 
1209                 (void) fprintf(stderr, "\t%s", cmdname);
1210                 (void) fprintf(stderr,
1211                     gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1212                     "pattern_list [file ...]\n"));
1213 
1214                 (void) fprintf(stderr, "\t%s", cmdname);
1215                 (void) fprintf(stderr,
1216                     gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1217                     "[-f pattern_file]... [file...]\n"));
1218         }
1219         exit(2);
1220         /* NOTREACHED */
1221 }
1222 
1223 /*
1224  * Compile literal pattern into BMG tables
1225  */
1226 static void
1227 bmgcomp(char *pat, int len)
1228 {
1229         int     i;
1230         int     tlen;
1231         unsigned char   *uc = (unsigned char *)pat;
1232 
1233         bmglen = len;
1234         bmgpat = pat;
1235 
1236         for (i = 0; i < M_CSETSIZE; i++) {
1237                 bmgtab[i] = len;
1238         }
1239 
1240         len--;
1241         for (tlen = len, i = 0; i <= len; i++, tlen--) {
1242                 bmgtab[*uc++] = tlen;
1243         }
1244 }
1245 
1246 /*
1247  * BMG search.
1248  */
1249 static char *
1250 bmgexec(char *str, char *end)
1251 {
1252         int     t;
1253         char    *k, *s, *p;
1254 
1255         k = str + bmglen - 1;
1256         if (bmglen == 1) {
1257                 return (memchr(str, bmgpat[0], end - str));
1258         }
1259         for (; ; ) {
1260                 /* inner loop, should be most optimized */
1261                 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1262                         k += t;
1263                 }
1264                 if (k >= end) {
1265                         return (NULL);
1266                 }
1267                 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1268                         if (p == bmgpat) {
1269                                 return (s);
1270                         }
1271                 }
1272                 k++;
1273         }
1274         /* NOTREACHED */
1275 }