1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * grep - pattern matching program - combined grep, egrep, and fgrep.
  29  *      Based on MKS grep command, with XCU & Solaris mods.
  30  */
  31 
  32 /*
  33  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
  34  *
  35  */
  36 
  37 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
  38 
  39 /*
  40  * Copyright 2013 Damian Bogel. All rights reserved.
  41  */
  42 
  43 #include <string.h>
  44 #include <stdlib.h>
  45 #include <ctype.h>
  46 #include <stdarg.h>
  47 #include <regex.h>
  48 #include <limits.h>
  49 #include <sys/types.h>
  50 #include <sys/stat.h>
  51 #include <fcntl.h>
  52 #include <stdio.h>
  53 #include <locale.h>
  54 #include <wchar.h>
  55 #include <errno.h>
  56 #include <unistd.h>
  57 #include <wctype.h>
  58 #include <ftw.h>
  59 #include <sys/param.h>
  60 
  61 #define STDIN_FILENAME gettext("(standard input)")
  62 
  63 #define BSIZE           512             /* Size of block for -b */
  64 #define BUFSIZE         8192            /* Input buffer size */
  65 #define MAX_DEPTH       1000            /* how deep to recurse */
  66 
  67 #define M_CSETSIZE      256             /* singlebyte chars */
  68 static int      bmglen;                 /* length of BMG pattern */
  69 static char     *bmgpat;                /* BMG pattern */
  70 static int      bmgtab[M_CSETSIZE];     /* BMG delta1 table */
  71 
  72 typedef struct  _PATTERN        {
  73         char    *pattern;               /* original pattern */
  74         wchar_t *wpattern;              /* wide, lowercased pattern */
  75         struct  _PATTERN        *next;
  76         regex_t re;                     /* compiled pattern */
  77 } PATTERN;
  78 
  79 static PATTERN  *patterns;
  80 static char     errstr[128];            /* regerror string buffer */
  81 static int      regflags = 0;           /* regcomp options */
  82 static int      matched = 0;            /* return of the grep() */
  83 static int      errors = 0;             /* count of errors */
  84 static uchar_t  fgrep = 0;              /* Invoked as fgrep */
  85 static uchar_t  egrep = 0;              /* Invoked as egrep */
  86 static uchar_t  nvflag = 1;             /* Print matching lines */
  87 static uchar_t  cflag;                  /* Count of matches */
  88 static uchar_t  iflag;                  /* Case insensitve matching */
  89 static uchar_t  Hflag;                  /* Precede lines by file name */
  90 static uchar_t  hflag;                  /* Supress printing of filename */
  91 static uchar_t  lflag;                  /* Print file names of matches */
  92 static uchar_t  nflag;                  /* Precede lines by line number */
  93 static uchar_t  rflag;                  /* Search directories recursively */
  94 static uchar_t  bflag;                  /* Preccede matches by block number */
  95 static uchar_t  sflag;                  /* Suppress file error messages */
  96 static uchar_t  qflag;                  /* Suppress standard output */
  97 static uchar_t  wflag;                  /* Search for expression as a word */
  98 static uchar_t  xflag;                  /* Anchoring */
  99 static uchar_t  Eflag;                  /* Egrep or -E flag */
 100 static uchar_t  Fflag;                  /* Fgrep or -F flag */
 101 static uchar_t  Rflag;                  /* Like rflag, but follow symlinks */
 102 static uchar_t  outfn;                  /* Put out file name */
 103 static char     *cmdname;
 104 
 105 static int      use_wchar, use_bmg, mblocale;
 106 
 107 static size_t   outbuflen, prntbuflen;
 108 static char     *prntbuf;
 109 static wchar_t  *outline;
 110 
 111 static void     addfile(const char *fn);
 112 static void     addpattern(char *s);
 113 static void     fixpatterns(void);
 114 static void     usage(void);
 115 static int      grep(int, const char *);
 116 static void     bmgcomp(char *, int);
 117 static char     *bmgexec(char *, char *);
 118 static int      recursive(const char *, const struct stat *, int, struct FTW *);
 119 static void     process_path(const char *);
 120 static void     process_file(const char *, int);
 121 
 122 /*
 123  * mainline for grep
 124  */
 125 int
 126 main(int argc, char **argv)
 127 {
 128         char    *ap;
 129         int     c;
 130         int     fflag = 0;
 131         int     i, n_pattern = 0, n_file = 0;
 132         char    **pattern_list = NULL;
 133         char    **file_list = NULL;
 134 
 135         (void) setlocale(LC_ALL, "");
 136 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 137 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
 138 #endif
 139         (void) textdomain(TEXT_DOMAIN);
 140 
 141         /*
 142          * true if this is running on the multibyte locale
 143          */
 144         mblocale = (MB_CUR_MAX > 1);
 145         /*
 146          * Skip leading slashes
 147          */
 148         cmdname = argv[0];
 149         if (ap = strrchr(cmdname, '/'))
 150                 cmdname = ap + 1;
 151 
 152         ap = cmdname;
 153         /*
 154          * Detect egrep/fgrep via command name, map to -E and -F options.
 155          */
 156         if (*ap == 'e' || *ap == 'E') {
 157                 regflags |= REG_EXTENDED;
 158                 egrep++;
 159         } else {
 160                 if (*ap == 'f' || *ap == 'F') {
 161                         fgrep++;
 162                 }
 163         }
 164 
 165         while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
 166                 switch (c) {
 167                 case 'v':       /* POSIX: negate matches */
 168                         nvflag = 0;
 169                         break;
 170 
 171                 case 'c':       /* POSIX: write count */
 172                         cflag++;
 173                         break;
 174 
 175                 case 'i':       /* POSIX: ignore case */
 176                         iflag++;
 177                         regflags |= REG_ICASE;
 178                         break;
 179 
 180                 case 'l':       /* POSIX: Write filenames only */
 181                         lflag++;
 182                         break;
 183 
 184                 case 'n':       /* POSIX: Write line numbers */
 185                         nflag++;
 186                         break;
 187 
 188                 case 'r':       /* Solaris: search recursively */
 189                         rflag++;
 190                         break;
 191 
 192                 case 'b':       /* Solaris: Write file block numbers */
 193                         bflag++;
 194                         break;
 195 
 196                 case 's':       /* POSIX: No error msgs for files */
 197                         sflag++;
 198                         break;
 199 
 200                 case 'e':       /* POSIX: pattern list */
 201                         n_pattern++;
 202                         pattern_list = realloc(pattern_list,
 203                             sizeof (char *) * n_pattern);
 204                         if (pattern_list == NULL) {
 205                                 (void) fprintf(stderr,
 206                                     gettext("%s: out of memory\n"),
 207                                     cmdname);
 208                                 exit(2);
 209                         }
 210                         *(pattern_list + n_pattern - 1) = optarg;
 211                         break;
 212 
 213                 case 'f':       /* POSIX: pattern file */
 214                         fflag = 1;
 215                         n_file++;
 216                         file_list = realloc(file_list,
 217                             sizeof (char *) * n_file);
 218                         if (file_list == NULL) {
 219                                 (void) fprintf(stderr,
 220                                     gettext("%s: out of memory\n"),
 221                                     cmdname);
 222                                 exit(2);
 223                         }
 224                         *(file_list + n_file - 1) = optarg;
 225                         break;
 226 
 227                 /* based on options order h or H is set as in GNU grep */
 228                 case 'h':       /* Solaris: supress printing of file name */
 229                         hflag = 1;
 230                         Hflag = 0;
 231                         break;
 232                 /* Solaris: precede every matching with file name */
 233                 case 'H':
 234                         Hflag = 1;
 235                         hflag = 0;
 236                         break;
 237 
 238                 case 'q':       /* POSIX: quiet: status only */
 239                         qflag++;
 240                         break;
 241 
 242                 case 'w':       /* Solaris: treat pattern as word */
 243                         wflag++;
 244                         break;
 245 
 246                 case 'x':       /* POSIX: full line matches */
 247                         xflag++;
 248                         regflags |= REG_ANCHOR;
 249                         break;
 250 
 251                 case 'E':       /* POSIX: Extended RE's */
 252                         regflags |= REG_EXTENDED;
 253                         Eflag++;
 254                         break;
 255 
 256                 case 'F':       /* POSIX: strings, not RE's */
 257                         Fflag++;
 258                         break;
 259 
 260                 case 'R':       /* Solaris: like rflag, but follow symlinks */
 261                         Rflag++;
 262                         rflag++;
 263                         break;
 264 
 265                 default:
 266                         usage();
 267                 }
 268         }
 269         /*
 270          * If we're invoked as egrep or fgrep we need to do some checks
 271          */
 272 
 273         if (egrep || fgrep) {
 274                 /*
 275                  * Use of -E or -F with egrep or fgrep is illegal
 276                  */
 277                 if (Eflag || Fflag)
 278                         usage();
 279                 /*
 280                  * Don't allow use of wflag with egrep / fgrep
 281                  */
 282                 if (wflag)
 283                         usage();
 284                 /*
 285                  * For Solaris the -s flag is equivalent to XCU -q
 286                  */
 287                 if (sflag)
 288                         qflag++;
 289                 /*
 290                  * done with above checks - set the appropriate flags
 291                  */
 292                 if (egrep)
 293                         Eflag++;
 294                 else                    /* Else fgrep */
 295                         Fflag++;
 296         }
 297 
 298         if (wflag && (Eflag || Fflag)) {
 299                 /*
 300                  * -w cannot be specified with grep -F
 301                  */
 302                 usage();
 303         }
 304 
 305         /*
 306          * -E and -F flags are mutually exclusive - check for this
 307          */
 308         if (Eflag && Fflag)
 309                 usage();
 310 
 311         /*
 312          * -l overrides -H like in GNU grep
 313          */
 314         if (lflag)
 315                 Hflag = 0;
 316 
 317         /*
 318          * -c, -l and -q flags are mutually exclusive
 319          * We have -c override -l like in Solaris.
 320          * -q overrides -l & -c programmatically in grep() function.
 321          */
 322         if (cflag && lflag)
 323                 lflag = 0;
 324 
 325         argv += optind - 1;
 326         argc -= optind - 1;
 327 
 328         /*
 329          * Now handling -e and -f option
 330          */
 331         if (pattern_list) {
 332                 for (i = 0; i < n_pattern; i++) {
 333                         addpattern(pattern_list[i]);
 334                 }
 335                 free(pattern_list);
 336         }
 337         if (file_list) {
 338                 for (i = 0; i < n_file; i++) {
 339                         addfile(file_list[i]);
 340                 }
 341                 free(file_list);
 342         }
 343 
 344         /*
 345          * No -e or -f?  Make sure there is one more arg, use it as the pattern.
 346          */
 347         if (patterns == NULL && !fflag) {
 348                 if (argc < 2)
 349                         usage();
 350                 addpattern(argv[1]);
 351                 argc--;
 352                 argv++;
 353         }
 354 
 355         /*
 356          * If -x flag is not specified or -i flag is specified
 357          * with fgrep in a multibyte locale, need to use
 358          * the wide character APIs.  Otherwise, byte-oriented
 359          * process will be done.
 360          */
 361         use_wchar = Fflag && mblocale && (!xflag || iflag);
 362 
 363         /*
 364          * Compile Patterns and also decide if BMG can be used
 365          */
 366         fixpatterns();
 367 
 368         /* Process all files: stdin, or rest of arg list */
 369         if (argc < 2) {
 370                 matched = grep(0, STDIN_FILENAME);
 371         } else {
 372                 if (Hflag || (argc > 2 && hflag == 0))
 373                         outfn = 1;      /* Print filename on match line */
 374                 for (argv++; *argv != NULL; argv++) {
 375                         process_path(*argv);
 376                 }
 377         }
 378         /*
 379          * Return() here is used instead of exit
 380          */
 381 
 382         (void) fflush(stdout);
 383 
 384         if (errors)
 385                 return (2);
 386         return (matched ? 0 : 1);
 387 }
 388 
 389 static void
 390 process_path(const char *path)
 391 {
 392         struct  stat st;
 393         int     walkflags = FTW_CHDIR;
 394         char    *buf = NULL;
 395 
 396         if (rflag) {
 397                 if (stat(path, &st) != -1 &&
 398                     (st.st_mode & S_IFMT) == S_IFDIR) {
 399                         outfn = 1; /* Print filename */
 400 
 401                         /*
 402                          * Add trailing slash if arg
 403                          * is directory, to resolve symlinks.
 404                          */
 405                         if (path[strlen(path) - 1] != '/') {
 406                                 (void) asprintf(&buf, "%s/", path);
 407                                 if (buf != NULL)
 408                                         path = buf;
 409                         }
 410 
 411                         /*
 412                          * Search through subdirs if path is directory.
 413                          * Don't follow symlinks if Rflag is not set.
 414                          */
 415                         if (!Rflag)
 416                                 walkflags |= FTW_PHYS;
 417 
 418                         if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
 419                                 if (!sflag)
 420                                         (void) fprintf(stderr,
 421                                             gettext("%s: can't open \"%s\"\n"),
 422                                             cmdname, path);
 423                                 errors = 1;
 424                         }
 425                         return;
 426                 }
 427         }
 428         process_file(path, 0);
 429 }
 430 
 431 /*
 432  * Read and process all files in directory recursively.
 433  */
 434 static int
 435 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
 436 {
 437         /*
 438          * Process files and follow symlinks if Rflag set.
 439          */
 440         if (info != FTW_F) {
 441                 /* Report broken symlinks and unreadable files */
 442                 if (!sflag &&
 443                     (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
 444                         (void) fprintf(stderr,
 445                             gettext("%s: can't open \"%s\"\n"), cmdname, name);
 446                 }
 447                 return (0);
 448         }
 449 
 450 
 451         /* Skip devices and pipes if Rflag is not set */
 452         if (!Rflag && !S_ISREG(statp->st_mode))
 453                 return (0);
 454         /* Pass offset to relative name from FTW_CHDIR */
 455         process_file(name, ftw->base);
 456         return (0);
 457 }
 458 
 459 /*
 460  * Opens file and call grep function.
 461  */
 462 static void
 463 process_file(const char *name, int base)
 464 {
 465         int fd;
 466 
 467         if ((fd = open(name + base, O_RDONLY)) == -1) {
 468                 errors = 1;
 469                 if (!sflag) /* Silent mode */
 470                         (void) fprintf(stderr, gettext(
 471                             "%s: can't open \"%s\"\n"),
 472                             cmdname, name);
 473                 return;
 474         }
 475         matched |= grep(fd, name);
 476         (void) close(fd);
 477 
 478         if (ferror(stdout)) {
 479                 (void) fprintf(stderr, gettext(
 480                     "%s: error writing to stdout\n"),
 481                     cmdname);
 482                 (void) fflush(stdout);
 483                 exit(2);
 484         }
 485 
 486 }
 487 
 488 /*
 489  * Add a file of strings to the pattern list.
 490  */
 491 static void
 492 addfile(const char *fn)
 493 {
 494         FILE    *fp;
 495         char    *inbuf;
 496         char    *bufp;
 497         size_t  bufsiz, buflen, bufused;
 498 
 499         /*
 500          * Open the pattern file
 501          */
 502         if ((fp = fopen(fn, "r")) == NULL) {
 503                 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
 504                     cmdname, fn);
 505                 exit(2);
 506         }
 507         bufsiz = BUFSIZE;
 508         if ((inbuf = malloc(bufsiz)) == NULL) {
 509                 (void) fprintf(stderr,
 510                     gettext("%s: out of memory\n"), cmdname);
 511                 exit(2);
 512         }
 513         bufp = inbuf;
 514         bufused = 0;
 515         /*
 516          * Read in the file, reallocing as we need more memory
 517          */
 518         while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
 519                 buflen = strlen(bufp);
 520                 bufused += buflen;
 521                 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
 522                         /*
 523                          * if this line does not fit to the buffer,
 524                          * realloc larger buffer
 525                          */
 526                         bufsiz += BUFSIZE;
 527                         if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
 528                                 (void) fprintf(stderr,
 529                                     gettext("%s: out of memory\n"),
 530                                     cmdname);
 531                                 exit(2);
 532                         }
 533                         bufp = inbuf + bufused;
 534                         continue;
 535                 }
 536                 if (bufp[buflen - 1] == '\n') {
 537                         bufp[--buflen] = '\0';
 538                 }
 539                 addpattern(inbuf);
 540 
 541                 bufp = inbuf;
 542                 bufused = 0;
 543         }
 544         free(inbuf);
 545         (void) fclose(fp);
 546 }
 547 
 548 /*
 549  * Add a string to the pattern list.
 550  */
 551 static void
 552 addpattern(char *s)
 553 {
 554         PATTERN *pp;
 555         char    *wordbuf;
 556         char    *np;
 557 
 558         for (; ; ) {
 559                 np = strchr(s, '\n');
 560                 if (np != NULL)
 561                         *np = '\0';
 562                 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
 563                         (void) fprintf(stderr, gettext(
 564                             "%s: out of memory\n"),
 565                             cmdname);
 566                         exit(2);
 567                 }
 568                 if (wflag) {
 569                         /*
 570                          * Solaris wflag support: Add '<' '>' to pattern to
 571                          * select it as a word. Doesn't make sense with -F
 572                          * but we're Libertarian.
 573                          */
 574                         size_t  slen, wordlen;
 575 
 576                         slen = strlen(s);
 577                         wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
 578                         if ((wordbuf = malloc(wordlen)) == NULL) {
 579                                 (void) fprintf(stderr,
 580                                     gettext("%s: out of memory\n"),
 581                                     cmdname);
 582                                 exit(2);
 583                         }
 584                         (void) strcpy(wordbuf, "\\<");
 585                         (void) strcpy(wordbuf + 2, s);
 586                         (void) strcpy(wordbuf + 2 + slen, "\\>");
 587                 } else {
 588                         if ((wordbuf = strdup(s)) == NULL) {
 589                                 (void) fprintf(stderr,
 590                                     gettext("%s: out of memory\n"),
 591                                     cmdname);
 592                                 exit(2);
 593                         }
 594                 }
 595                 pp->pattern = wordbuf;
 596                 pp->next = patterns;
 597                 patterns = pp;
 598                 if (np == NULL)
 599                         break;
 600                 s = np + 1;
 601         }
 602 }
 603 
 604 /*
 605  * Fix patterns.
 606  * Must do after all arguments read, in case later -i option.
 607  */
 608 static void
 609 fixpatterns(void)
 610 {
 611         PATTERN *pp;
 612         int     rv, fix_pattern, npatterns;
 613 
 614         /*
 615          * As REG_ANCHOR flag is not supported in the current Solaris,
 616          * need to fix the specified pattern if -x is specified with
 617          * grep or egrep
 618          */
 619         fix_pattern = !Fflag && xflag;
 620 
 621         for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
 622                 npatterns++;
 623                 if (fix_pattern) {
 624                         char    *cp, *cq;
 625                         size_t  plen, nplen;
 626 
 627                         plen = strlen(pp->pattern);
 628                         /* '^' pattern '$' */
 629                         nplen = 1 + plen + 1 + 1;
 630                         if ((cp = malloc(nplen)) == NULL) {
 631                                 (void) fprintf(stderr,
 632                                     gettext("%s: out of memory\n"),
 633                                     cmdname);
 634                                 exit(2);
 635                         }
 636                         cq = cp;
 637                         *cq++ = '^';
 638                         cq = strcpy(cq, pp->pattern) + plen;
 639                         *cq++ = '$';
 640                         *cq = '\0';
 641                         free(pp->pattern);
 642                         pp->pattern = cp;
 643                 }
 644 
 645                 if (Fflag) {
 646                         if (use_wchar) {
 647                                 /*
 648                                  * Fflag && mblocale && iflag
 649                                  * Fflag && mblocale && !xflag
 650                                  */
 651                                 size_t  n;
 652                                 n = strlen(pp->pattern) + 1;
 653                                 if ((pp->wpattern =
 654                                     malloc(sizeof (wchar_t) * n)) == NULL) {
 655                                         (void) fprintf(stderr,
 656                                             gettext("%s: out of memory\n"),
 657                                             cmdname);
 658                                         exit(2);
 659                                 }
 660                                 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
 661                                     (size_t)-1) {
 662                                         (void) fprintf(stderr,
 663                                             gettext("%s: failed to convert "
 664                                             "\"%s\" to wide-characters\n"),
 665                                             cmdname, pp->pattern);
 666                                         exit(2);
 667                                 }
 668                                 if (iflag) {
 669                                         wchar_t *wp;
 670                                         for (wp = pp->wpattern; *wp != L'\0';
 671                                             wp++) {
 672                                                 *wp = towlower((wint_t)*wp);
 673                                         }
 674                                 }
 675                                 free(pp->pattern);
 676                         } else {
 677                                 /*
 678                                  * Fflag && mblocale && !iflag
 679                                  * Fflag && !mblocale && iflag
 680                                  * Fflag && !mblocale && !iflag
 681                                  */
 682                                 if (iflag) {
 683                                         unsigned char   *cp;
 684                                         for (cp = (unsigned char *)pp->pattern;
 685                                             *cp != '\0'; cp++) {
 686                                                 *cp = tolower(*cp);
 687                                         }
 688                                 }
 689                         }
 690                         /*
 691                          * fgrep: No regular expressions.
 692                          */
 693                         continue;
 694                 }
 695 
 696                 /*
 697                  * For non-fgrep, compile the regular expression,
 698                  * give an informative error message, and exit if
 699                  * it didn't compile.
 700                  */
 701                 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
 702                         (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
 703                         (void) fprintf(stderr,
 704                             gettext("%s: RE error in %s: %s\n"),
 705                             cmdname, pp->pattern, errstr);
 706                         exit(2);
 707                 }
 708                 free(pp->pattern);
 709         }
 710 
 711         /*
 712          * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
 713          * Use the Boyer-Moore-Gosper algorithm if:
 714          * - fgrep                      (Fflag)
 715          * - singlebyte locale          (!mblocale)
 716          * - no ignoring case           (!iflag)
 717          * - no printing line numbers   (!nflag)
 718          * - no negating the output     (nvflag)
 719          * - only one pattern           (npatterns == 1)
 720          * - non zero length pattern    (strlen(patterns->pattern) != 0)
 721          *
 722          * It's guaranteed patterns->pattern is still alive
 723          * when Fflag && !mblocale.
 724          */
 725         use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
 726             (npatterns == 1) && (strlen(patterns->pattern) != 0);
 727 }
 728 
 729 /*
 730  * Search a newline from the beginning of the string
 731  */
 732 static char *
 733 find_nl(const char *ptr, size_t len)
 734 {
 735         while (len-- != 0) {
 736                 if (*ptr++ == '\n') {
 737                         return ((char *)--ptr);
 738                 }
 739         }
 740         return (NULL);
 741 }
 742 
 743 /*
 744  * Search a newline from the end of the string
 745  */
 746 static char *
 747 rfind_nl(const char *ptr, size_t len)
 748 {
 749         const char      *uptr = ptr + len;
 750         while (len--) {
 751                 if (*--uptr == '\n') {
 752                         return ((char *)uptr);
 753                 }
 754         }
 755         return (NULL);
 756 }
 757 
 758 /*
 759  * Duplicate the specified string converting each character
 760  * into a lower case.
 761  */
 762 static char *
 763 istrdup(const char *s1)
 764 {
 765         static size_t   ibuflen = 0;
 766         static char     *ibuf = NULL;
 767         size_t  slen;
 768         char    *p;
 769 
 770         slen = strlen(s1);
 771         if (slen >= ibuflen) {
 772                 /* ibuf does not fit to s1 */
 773                 ibuflen = slen + 1;
 774                 ibuf = realloc(ibuf, ibuflen);
 775                 if (ibuf == NULL) {
 776                         (void) fprintf(stderr,
 777                             gettext("%s: out of memory\n"), cmdname);
 778                         exit(2);
 779                 }
 780         }
 781         p = ibuf;
 782         do {
 783                 *p++ = tolower(*s1);
 784         } while (*s1++ != '\0');
 785         return (ibuf);
 786 }
 787 
 788 /*
 789  * Do grep on a single file.
 790  * Return true in any lines matched.
 791  *
 792  * We have two strategies:
 793  * The fast one is used when we have a single pattern with
 794  * a string known to occur in the pattern. We can then
 795  * do a BMG match on the whole buffer.
 796  * This is an order of magnitude faster.
 797  * Otherwise we split the buffer into lines,
 798  * and check for a match on each line.
 799  */
 800 static int
 801 grep(int fd, const char *fn)
 802 {
 803         PATTERN *pp;
 804         off_t   data_len;       /* length of the data chunk */
 805         off_t   line_len;       /* length of the current line */
 806         off_t   line_offset;    /* current line's offset from the beginning */
 807         long long       lineno;
 808         long long       matches = 0;    /* Number of matching lines */
 809         int     newlinep;       /* 0 if the last line of file has no newline */
 810         char    *ptr, *ptrend;
 811 
 812 
 813         if (patterns == NULL)
 814                 return (0);     /* no patterns to match -- just return */
 815 
 816         pp = patterns;
 817 
 818         if (use_bmg) {
 819                 bmgcomp(pp->pattern, strlen(pp->pattern));
 820         }
 821 
 822         if (use_wchar && outline == NULL) {
 823                 outbuflen = BUFSIZE + 1;
 824                 outline = malloc(sizeof (wchar_t) * outbuflen);
 825                 if (outline == NULL) {
 826                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 827                             cmdname);
 828                         exit(2);
 829                 }
 830         }
 831 
 832         if (prntbuf == NULL) {
 833                 prntbuflen = BUFSIZE;
 834                 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
 835                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 836                             cmdname);
 837                         exit(2);
 838                 }
 839         }
 840 
 841         line_offset = 0;
 842         lineno = 0;
 843         newlinep = 1;
 844         data_len = 0;
 845         for (; ; ) {
 846                 long    count;
 847                 off_t   offset = 0;
 848 
 849                 if (data_len == 0) {
 850                         /*
 851                          * If no data in the buffer, reset ptr
 852                          */
 853                         ptr = prntbuf;
 854                 }
 855                 if (ptr == prntbuf) {
 856                         /*
 857                          * The current data chunk starts from prntbuf.
 858                          * This means either the buffer has no data
 859                          * or the buffer has no newline.
 860                          * So, read more data from input.
 861                          */
 862                         count = read(fd, ptr + data_len, prntbuflen - data_len);
 863                         if (count < 0) {
 864                                 /* read error */
 865                                 if (cflag) {
 866                                         if (outfn && !rflag) {
 867                                                 (void) fprintf(stdout,
 868                                                     "%s:", fn);
 869                                         }
 870                                         if (!qflag && !rflag) {
 871                                                 (void) fprintf(stdout, "%lld\n",
 872                                                     matches);
 873                                         }
 874                                 }
 875                                 return (0);
 876                         } else if (count == 0) {
 877                                 /* no new data */
 878                                 if (data_len == 0) {
 879                                         /* end of file already reached */
 880                                         break;
 881                                 }
 882                                 /* last line of file has no newline */
 883                                 ptrend = ptr + data_len;
 884                                 newlinep = 0;
 885                                 goto L_start_process;
 886                         }
 887                         offset = data_len;
 888                         data_len += count;
 889                 }
 890 
 891                 /*
 892                  * Look for newline in the chunk
 893                  * between ptr + offset and ptr + data_len - offset.
 894                  */
 895                 ptrend = find_nl(ptr + offset, data_len - offset);
 896                 if (ptrend == NULL) {
 897                         /* no newline found in this chunk */
 898                         if (ptr > prntbuf) {
 899                                 /*
 900                                  * Move remaining data to the beginning
 901                                  * of the buffer.
 902                                  * Remaining data lie from ptr for
 903                                  * data_len bytes.
 904                                  */
 905                                 (void) memmove(prntbuf, ptr, data_len);
 906                         }
 907                         if (data_len == prntbuflen) {
 908                                 /*
 909                                  * No enough room in the buffer
 910                                  */
 911                                 prntbuflen += BUFSIZE;
 912                                 prntbuf = realloc(prntbuf, prntbuflen + 1);
 913                                 if (prntbuf == NULL) {
 914                                         (void) fprintf(stderr,
 915                                             gettext("%s: out of memory\n"),
 916                                             cmdname);
 917                                         exit(2);
 918                                 }
 919                         }
 920                         ptr = prntbuf;
 921                         /* read the next input */
 922                         continue;
 923                 }
 924 L_start_process:
 925 
 926                 /*
 927                  * Beginning of the chunk:      ptr
 928                  * End of the chunk:            ptr + data_len
 929                  * Beginning of the line:       ptr
 930                  * End of the line:             ptrend
 931                  */
 932 
 933                 if (use_bmg) {
 934                         /*
 935                          * Use Boyer-Moore-Gosper algorithm to find out if
 936                          * this chunk (not this line) contains the specified
 937                          * pattern.  If not, restart from the last line
 938                          * of this chunk.
 939                          */
 940                         char    *bline;
 941                         bline = bmgexec(ptr, ptr + data_len);
 942                         if (bline == NULL) {
 943                                 /*
 944                                  * No pattern found in this chunk.
 945                                  * Need to find the last line
 946                                  * in this chunk.
 947                                  */
 948                                 ptrend = rfind_nl(ptr, data_len);
 949 
 950                                 /*
 951                                  * When this chunk does not contain newline,
 952                                  * ptrend becomes NULL, which should happen
 953                                  * when the last line of file does not end
 954                                  * with a newline.  At such a point,
 955                                  * newlinep should have been set to 0.
 956                                  * Therefore, just after jumping to
 957                                  * L_skip_line, the main for-loop quits,
 958                                  * and the line_len value won't be
 959                                  * used.
 960                                  */
 961                                 line_len = ptrend - ptr;
 962                                 goto L_skip_line;
 963                         }
 964                         if (bline > ptrend) {
 965                                 /*
 966                                  * Pattern found not in the first line
 967                                  * of this chunk.
 968                                  * Discard the first line.
 969                                  */
 970                                 line_len = ptrend - ptr;
 971                                 goto L_skip_line;
 972                         }
 973                         /*
 974                          * Pattern found in the first line of this chunk.
 975                          * Using this result.
 976                          */
 977                         *ptrend = '\0';
 978                         line_len = ptrend - ptr;
 979 
 980                         /*
 981                          * before jumping to L_next_line,
 982                          * need to handle xflag if specified
 983                          */
 984                         if (xflag && (line_len != bmglen ||
 985                             strcmp(bmgpat, ptr) != 0)) {
 986                                 /* didn't match */
 987                                 pp = NULL;
 988                         } else {
 989                                 pp = patterns; /* to make it happen */
 990                         }
 991                         goto L_next_line;
 992                 }
 993                 lineno++;
 994                 /*
 995                  * Line starts from ptr and ends at ptrend.
 996                  * line_len will be the length of the line.
 997                  */
 998                 *ptrend = '\0';
 999                 line_len = ptrend - ptr;
1000 
1001                 /*
1002                  * From now, the process will be performed based
1003                  * on the line from ptr to ptrend.
1004                  */
1005                 if (use_wchar) {
1006                         size_t  len;
1007 
1008                         if (line_len >= outbuflen) {
1009                                 outbuflen = line_len + 1;
1010                                 outline = realloc(outline,
1011                                     sizeof (wchar_t) * outbuflen);
1012                                 if (outline == NULL) {
1013                                         (void) fprintf(stderr,
1014                                             gettext("%s: out of memory\n"),
1015                                             cmdname);
1016                                         exit(2);
1017                                 }
1018                         }
1019 
1020                         len = mbstowcs(outline, ptr, line_len);
1021                         if (len == (size_t)-1) {
1022                                 (void) fprintf(stderr, gettext(
1023         "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1024                                     cmdname, fn, lineno);
1025                                 /* never match a line with invalid sequence */
1026                                 goto L_skip_line;
1027                         }
1028                         outline[len] = L'\0';
1029 
1030                         if (iflag) {
1031                                 wchar_t *cp;
1032                                 for (cp = outline; *cp != '\0'; cp++) {
1033                                         *cp = towlower((wint_t)*cp);
1034                                 }
1035                         }
1036 
1037                         if (xflag) {
1038                                 for (pp = patterns; pp; pp = pp->next) {
1039                                         if (outline[0] == pp->wpattern[0] &&
1040                                             wcscmp(outline,
1041                                             pp->wpattern) == 0) {
1042                                                 /* matched */
1043                                                 break;
1044                                         }
1045                                 }
1046                         } else {
1047                                 for (pp = patterns; pp; pp = pp->next) {
1048                                         if (wcswcs(outline, pp->wpattern)
1049                                             != NULL) {
1050                                                 /* matched */
1051                                                 break;
1052                                         }
1053                                 }
1054                         }
1055                 } else if (Fflag) {
1056                         /* fgrep in byte-oriented handling */
1057                         char    *fptr;
1058                         if (iflag) {
1059                                 fptr = istrdup(ptr);
1060                         } else {
1061                                 fptr = ptr;
1062                         }
1063                         if (xflag) {
1064                                 /* fgrep -x */
1065                                 for (pp = patterns; pp; pp = pp->next) {
1066                                         if (fptr[0] == pp->pattern[0] &&
1067                                             strcmp(fptr, pp->pattern) == 0) {
1068                                                 /* matched */
1069                                                 break;
1070                                         }
1071                                 }
1072                         } else {
1073                                 for (pp = patterns; pp; pp = pp->next) {
1074                                         if (strstr(fptr, pp->pattern) != NULL) {
1075                                                 /* matched */
1076                                                 break;
1077                                         }
1078                                 }
1079                         }
1080                 } else {
1081                         /* grep or egrep */
1082                         for (pp = patterns; pp; pp = pp->next) {
1083                                 int     rv;
1084 
1085                                 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1086                                 if (rv == REG_OK) {
1087                                         /* matched */
1088                                         break;
1089                                 }
1090 
1091                                 switch (rv) {
1092                                 case REG_NOMATCH:
1093                                         break;
1094                                 case REG_ECHAR:
1095                                         (void) fprintf(stderr, gettext(
1096             "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1097                                             cmdname, fn, lineno);
1098                                         break;
1099                                 default:
1100                                         (void) regerror(rv, &pp->re, errstr,
1101                                             sizeof (errstr));
1102                                         (void) fprintf(stderr, gettext(
1103             "%s: input file \"%s\": line %lld: %s\n"),
1104                                             cmdname, fn, lineno, errstr);
1105                                         exit(2);
1106                                 }
1107                         }
1108                 }
1109 
1110 L_next_line:
1111                 /*
1112                  * Here, if pp points to non-NULL, something has been matched
1113                  * to the pattern.
1114                  */
1115                 if (nvflag == (pp != NULL)) {
1116                         matches++;
1117                         /*
1118                          * Handle q, l, and c flags.
1119                          */
1120                         if (qflag) {
1121                                 /* no need to continue */
1122                                 /*
1123                                  * End of this line is ptrend.
1124                                  * We have read up to ptr + data_len.
1125                                  */
1126                                 off_t   pos;
1127                                 pos = ptr + data_len - (ptrend + 1);
1128                                 (void) lseek(fd, -pos, SEEK_CUR);
1129                                 exit(0);
1130                         }
1131                         if (lflag) {
1132                                 (void) printf("%s\n", fn);
1133                                 break;
1134                         }
1135                         if (!cflag) {
1136                                 if (Hflag || outfn) {
1137                                         (void) printf("%s:", fn);
1138                                 }
1139                                 if (bflag) {
1140                                         (void) printf("%lld:", (offset_t)
1141                                             (line_offset / BSIZE));
1142                                 }
1143                                 if (nflag) {
1144                                         (void) printf("%lld:", lineno);
1145                                 }
1146                                 *ptrend = '\n';
1147                                 (void) fwrite(ptr, 1, line_len + 1, stdout);
1148                         }
1149                         if (ferror(stdout)) {
1150                                 return (0);
1151                         }
1152                 }
1153 L_skip_line:
1154                 if (!newlinep)
1155                         break;
1156 
1157                 data_len -= line_len + 1;
1158                 line_offset += line_len + 1;
1159                 ptr = ptrend + 1;
1160         }
1161 
1162         if (cflag) {
1163                 if (Hflag || outfn) {
1164                         (void) printf("%s:", fn);
1165                 }
1166                 if (!qflag) {
1167                         (void) printf("%lld\n", matches);
1168                 }
1169         }
1170         return (matches != 0);
1171 }
1172 
1173 /*
1174  * usage message for grep
1175  */
1176 static void
1177 usage(void)
1178 {
1179         if (egrep || fgrep) {
1180                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1181                 (void) fprintf(stderr,
1182                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1183                     "pattern_list [file ...]\n"));
1184 
1185                 (void) fprintf(stderr, "\t%s", cmdname);
1186                 (void) fprintf(stderr,
1187                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1188                     "[-e pattern_list]... "
1189                     "[-f pattern_file]... [file...]\n"));
1190         } else {
1191                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1192                 (void) fprintf(stderr,
1193                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1194                     "pattern_list [file ...]\n"));
1195 
1196                 (void) fprintf(stderr, "\t%s", cmdname);
1197                 (void) fprintf(stderr,
1198                     gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1199                     "[-e pattern_list]... "
1200                     "[-f pattern_file]... [file...]\n"));
1201 
1202                 (void) fprintf(stderr, "\t%s", cmdname);
1203                 (void) fprintf(stderr,
1204                     gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1205                     "pattern_list [file ...]\n"));
1206 
1207                 (void) fprintf(stderr, "\t%s", cmdname);
1208                 (void) fprintf(stderr,
1209                     gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1210                     "[-e pattern_list]... "
1211                     "[-f pattern_file]... [file...]\n"));
1212 
1213                 (void) fprintf(stderr, "\t%s", cmdname);
1214                 (void) fprintf(stderr,
1215                     gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1216                     "pattern_list [file ...]\n"));
1217 
1218                 (void) fprintf(stderr, "\t%s", cmdname);
1219                 (void) fprintf(stderr,
1220                     gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1221                     "[-f pattern_file]... [file...]\n"));
1222         }
1223         exit(2);
1224         /* NOTREACHED */
1225 }
1226 
1227 /*
1228  * Compile literal pattern into BMG tables
1229  */
1230 static void
1231 bmgcomp(char *pat, int len)
1232 {
1233         int     i;
1234         int     tlen;
1235         unsigned char   *uc = (unsigned char *)pat;
1236 
1237         bmglen = len;
1238         bmgpat = pat;
1239 
1240         for (i = 0; i < M_CSETSIZE; i++) {
1241                 bmgtab[i] = len;
1242         }
1243 
1244         len--;
1245         for (tlen = len, i = 0; i <= len; i++, tlen--) {
1246                 bmgtab[*uc++] = tlen;
1247         }
1248 }
1249 
1250 /*
1251  * BMG search.
1252  */
1253 static char *
1254 bmgexec(char *str, char *end)
1255 {
1256         int     t;
1257         char    *k, *s, *p;
1258 
1259         k = str + bmglen - 1;
1260         if (bmglen == 1) {
1261                 return (memchr(str, bmgpat[0], end - str));
1262         }
1263         for (; ; ) {
1264                 /* inner loop, should be most optimized */
1265                 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1266                         k += t;
1267                 }
1268                 if (k >= end) {
1269                         return (NULL);
1270                 }
1271                 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1272                         if (p == bmgpat) {
1273                                 return (s);
1274                         }
1275                 }
1276                 k++;
1277         }
1278         /* NOTREACHED */
1279 }