1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * grep - pattern matching program - combined grep, egrep, and fgrep.
  29  *      Based on MKS grep command, with XCU & Solaris mods.
  30  */
  31 
  32 /*
  33  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
  34  *
  35  */
  36 
  37 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
  38 
  39 #include <string.h>
  40 #include <stdlib.h>
  41 #include <ctype.h>
  42 #include <stdarg.h>
  43 #include <regex.h>
  44 #include <limits.h>
  45 #include <sys/types.h>
  46 #include <sys/stat.h>
  47 #include <fcntl.h>
  48 #include <stdio.h>
  49 #include <locale.h>
  50 #include <wchar.h>
  51 #include <errno.h>
  52 #include <unistd.h>
  53 #include <wctype.h>
  54 #include <ftw.h>
  55 #include <sys/param.h>
  56 
  57 #define BSIZE           512             /* Size of block for -b */
  58 #define BUFSIZE         8192            /* Input buffer size */
  59 #define MAX_DEPTH       1000            /* how deep to recurse */
  60 
  61 #define M_CSETSIZE      256             /* singlebyte chars */
  62 static int      bmglen;                 /* length of BMG pattern */
  63 static char     *bmgpat;                /* BMG pattern */
  64 static int      bmgtab[M_CSETSIZE];     /* BMG delta1 table */
  65 
  66 typedef struct  _PATTERN        {
  67         char    *pattern;               /* original pattern */
  68         wchar_t *wpattern;              /* wide, lowercased pattern */
  69         struct  _PATTERN        *next;
  70         regex_t re;                     /* compiled pattern */
  71 } PATTERN;
  72 
  73 static PATTERN  *patterns;
  74 static char     errstr[128];            /* regerror string buffer */
  75 static int      regflags = 0;           /* regcomp options */
  76 static int      matched = 0;            /* return of the grep() */
  77 static int      errors = 0;             /* count of errors */
  78 static uchar_t  fgrep = 0;              /* Invoked as fgrep */
  79 static uchar_t  egrep = 0;              /* Invoked as egrep */
  80 static uchar_t  nvflag = 1;             /* Print matching lines */
  81 static uchar_t  cflag;                  /* Count of matches */
  82 static uchar_t  iflag;                  /* Case insensitve matching */
  83 static uchar_t  hflag;                  /* Supress printing of filename */
  84 static uchar_t  lflag;                  /* Print file names of matches */
  85 static uchar_t  nflag;                  /* Precede lines by line number */
  86 static uchar_t  rflag;                  /* Search directories recursively */
  87 static uchar_t  bflag;                  /* Preccede matches by block number */
  88 static uchar_t  sflag;                  /* Suppress file error messages */
  89 static uchar_t  qflag;                  /* Suppress standard output */
  90 static uchar_t  wflag;                  /* Search for expression as a word */
  91 static uchar_t  xflag;                  /* Anchoring */
  92 static uchar_t  Eflag;                  /* Egrep or -E flag */
  93 static uchar_t  Fflag;                  /* Fgrep or -F flag */
  94 static uchar_t  Rflag;                  /* Like rflag, but follow symlinks */
  95 static uchar_t  outfn;                  /* Put out file name */
  96 static char     *cmdname;
  97 
  98 static int      use_wchar, use_bmg, mblocale;
  99 
 100 static size_t   outbuflen, prntbuflen;
 101 static char     *prntbuf;
 102 static wchar_t  *outline;
 103 
 104 static void     addfile(const char *fn);
 105 static void     addpattern(char *s);
 106 static void     fixpatterns(void);
 107 static void     usage(void);
 108 static int      grep(int, const char *);
 109 static void     bmgcomp(char *, int);
 110 static char     *bmgexec(char *, char *);
 111 static int      recursive(const char *, const struct stat *, int, struct FTW *);
 112 static void     process_path(const char *);
 113 static void     process_file(const char *, int);
 114 
 115 /*
 116  * mainline for grep
 117  */
 118 int
 119 main(int argc, char **argv)
 120 {
 121         char    *ap;
 122         int     c;
 123         int     fflag = 0;
 124         int     i, n_pattern = 0, n_file = 0;
 125         char    **pattern_list = NULL;
 126         char    **file_list = NULL;
 127 
 128         (void) setlocale(LC_ALL, "");
 129 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 130 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
 131 #endif
 132         (void) textdomain(TEXT_DOMAIN);
 133 
 134         /*
 135          * true if this is running on the multibyte locale
 136          */
 137         mblocale = (MB_CUR_MAX > 1);
 138         /*
 139          * Skip leading slashes
 140          */
 141         cmdname = argv[0];
 142         if (ap = strrchr(cmdname, '/'))
 143                 cmdname = ap + 1;
 144 
 145         ap = cmdname;
 146         /*
 147          * Detect egrep/fgrep via command name, map to -E and -F options.
 148          */
 149         if (*ap == 'e' || *ap == 'E') {
 150                 regflags |= REG_EXTENDED;
 151                 egrep++;
 152         } else {
 153                 if (*ap == 'f' || *ap == 'F') {
 154                         fgrep++;
 155                 }
 156         }
 157 
 158         while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
 159                 switch (c) {
 160                 case 'v':       /* POSIX: negate matches */
 161                         nvflag = 0;
 162                         break;
 163 
 164                 case 'c':       /* POSIX: write count */
 165                         cflag++;
 166                         break;
 167 
 168                 case 'i':       /* POSIX: ignore case */
 169                         iflag++;
 170                         regflags |= REG_ICASE;
 171                         break;
 172 
 173                 case 'l':       /* POSIX: Write filenames only */
 174                         lflag++;
 175                         break;
 176 
 177                 case 'n':       /* POSIX: Write line numbers */
 178                         nflag++;
 179                         break;
 180 
 181                 case 'r':       /* Solaris: search recursively */
 182                         rflag++;
 183                         break;
 184 
 185                 case 'b':       /* Solaris: Write file block numbers */
 186                         bflag++;
 187                         break;
 188 
 189                 case 's':       /* POSIX: No error msgs for files */
 190                         sflag++;
 191                         break;
 192 
 193                 case 'e':       /* POSIX: pattern list */
 194                         n_pattern++;
 195                         pattern_list = realloc(pattern_list,
 196                             sizeof (char *) * n_pattern);
 197                         if (pattern_list == NULL) {
 198                                 (void) fprintf(stderr,
 199                                     gettext("%s: out of memory\n"),
 200                                     cmdname);
 201                                 exit(2);
 202                         }
 203                         *(pattern_list + n_pattern - 1) = optarg;
 204                         break;
 205 
 206                 case 'f':       /* POSIX: pattern file */
 207                         fflag = 1;
 208                         n_file++;
 209                         file_list = realloc(file_list,
 210                             sizeof (char *) * n_file);
 211                         if (file_list == NULL) {
 212                                 (void) fprintf(stderr,
 213                                     gettext("%s: out of memory\n"),
 214                                     cmdname);
 215                                 exit(2);
 216                         }
 217                         *(file_list + n_file - 1) = optarg;
 218                         break;
 219                 case 'h':       /* Solaris: supress printing of file name */
 220                         hflag = 1;
 221                         break;
 222 
 223                 case 'q':       /* POSIX: quiet: status only */
 224                         qflag++;
 225                         break;
 226 
 227                 case 'w':       /* Solaris: treat pattern as word */
 228                         wflag++;
 229                         break;
 230 
 231                 case 'x':       /* POSIX: full line matches */
 232                         xflag++;
 233                         regflags |= REG_ANCHOR;
 234                         break;
 235 
 236                 case 'E':       /* POSIX: Extended RE's */
 237                         regflags |= REG_EXTENDED;
 238                         Eflag++;
 239                         break;
 240 
 241                 case 'F':       /* POSIX: strings, not RE's */
 242                         Fflag++;
 243                         break;
 244 
 245                 case 'R':       /* Solaris: like rflag, but follow symlinks */
 246                         Rflag++;
 247                         rflag++;
 248                         break;
 249 
 250                 default:
 251                         usage();
 252                 }
 253         }
 254         /*
 255          * If we're invoked as egrep or fgrep we need to do some checks
 256          */
 257 
 258         if (egrep || fgrep) {
 259                 /*
 260                  * Use of -E or -F with egrep or fgrep is illegal
 261                  */
 262                 if (Eflag || Fflag)
 263                         usage();
 264                 /*
 265                  * Don't allow use of wflag with egrep / fgrep
 266                  */
 267                 if (wflag)
 268                         usage();
 269                 /*
 270                  * For Solaris the -s flag is equivalent to XCU -q
 271                  */
 272                 if (sflag)
 273                         qflag++;
 274                 /*
 275                  * done with above checks - set the appropriate flags
 276                  */
 277                 if (egrep)
 278                         Eflag++;
 279                 else                    /* Else fgrep */
 280                         Fflag++;
 281         }
 282 
 283         if (wflag && (Eflag || Fflag)) {
 284                 /*
 285                  * -w cannot be specified with grep -F
 286                  */
 287                 usage();
 288         }
 289 
 290         /*
 291          * -E and -F flags are mutually exclusive - check for this
 292          */
 293         if (Eflag && Fflag)
 294                 usage();
 295 
 296         /*
 297          * -c, -l and -q flags are mutually exclusive
 298          * We have -c override -l like in Solaris.
 299          * -q overrides -l & -c programmatically in grep() function.
 300          */
 301         if (cflag && lflag)
 302                 lflag = 0;
 303 
 304         argv += optind - 1;
 305         argc -= optind - 1;
 306 
 307         /*
 308          * Now handling -e and -f option
 309          */
 310         if (pattern_list) {
 311                 for (i = 0; i < n_pattern; i++) {
 312                         addpattern(pattern_list[i]);
 313                 }
 314                 free(pattern_list);
 315         }
 316         if (file_list) {
 317                 for (i = 0; i < n_file; i++) {
 318                         addfile(file_list[i]);
 319                 }
 320                 free(file_list);
 321         }
 322 
 323         /*
 324          * No -e or -f?  Make sure there is one more arg, use it as the pattern.
 325          */
 326         if (patterns == NULL && !fflag) {
 327                 if (argc < 2)
 328                         usage();
 329                 addpattern(argv[1]);
 330                 argc--;
 331                 argv++;
 332         }
 333 
 334         /*
 335          * If -x flag is not specified or -i flag is specified
 336          * with fgrep in a multibyte locale, need to use
 337          * the wide character APIs.  Otherwise, byte-oriented
 338          * process will be done.
 339          */
 340         use_wchar = Fflag && mblocale && (!xflag || iflag);
 341 
 342         /*
 343          * Compile Patterns and also decide if BMG can be used
 344          */
 345         fixpatterns();
 346 
 347         /* Process all files: stdin, or rest of arg list */
 348         if (argc < 2) {
 349                 matched = grep(0, gettext("(standard input)"));
 350         } else {
 351                 if (argc > 2 && hflag == 0)
 352                         outfn = 1;      /* Print filename on match line */
 353                 for (argv++; *argv != NULL; argv++) {
 354                         process_path(*argv);
 355                 }
 356         }
 357         /*
 358          * Return() here is used instead of exit
 359          */
 360 
 361         (void) fflush(stdout);
 362 
 363         if (errors)
 364                 return (2);
 365         return (matched ? 0 : 1);
 366 }
 367 
 368 static void
 369 process_path(const char *path)
 370 {
 371         struct  stat st;
 372         int     walkflags = FTW_CHDIR;
 373         char    *buf = NULL;
 374 
 375         if (rflag) {
 376                 if (stat(path, &st) != -1 &&
 377                     (st.st_mode & S_IFMT) == S_IFDIR) {
 378                         outfn = 1; /* Print filename */
 379 
 380                         /*
 381                          * Add trailing slash if arg
 382                          * is directory, to resolve symlinks.
 383                          */
 384                         if (path[strlen(path) - 1] != '/') {
 385                                 (void) asprintf(&buf, "%s/", path);
 386                                 if (buf != NULL)
 387                                         path = buf;
 388                         }
 389 
 390                         /*
 391                          * Search through subdirs if path is directory.
 392                          * Don't follow symlinks if Rflag is not set.
 393                          */
 394                         if (!Rflag)
 395                                 walkflags |= FTW_PHYS;
 396 
 397                         if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
 398                                 if (!sflag)
 399                                         (void) fprintf(stderr,
 400                                             gettext("%s: can't open \"%s\"\n"),
 401                                             cmdname, path);
 402                                 errors = 1;
 403                         }
 404                         return;
 405                 }
 406         }
 407         process_file(path, 0);
 408 }
 409 
 410 /*
 411  * Read and process all files in directory recursively.
 412  */
 413 static int
 414 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
 415 {
 416         /*
 417          * Process files and follow symlinks if Rflag set.
 418          */
 419         if (info != FTW_F) {
 420                 /* Report broken symlinks and unreadable files */
 421                 if (!sflag &&
 422                     (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
 423                         (void) fprintf(stderr,
 424                             gettext("%s: can't open \"%s\"\n"), cmdname, name);
 425                 }
 426                 return (0);
 427         }
 428 
 429 
 430         /* Skip devices and pipes if Rflag is not set */
 431         if (!Rflag && !S_ISREG(statp->st_mode))
 432                 return (0);
 433         /* Pass offset to relative name from FTW_CHDIR */
 434         process_file(name, ftw->base);
 435         return (0);
 436 }
 437 
 438 /*
 439  * Opens file and call grep function.
 440  */
 441 static void
 442 process_file(const char *name, int base)
 443 {
 444         int fd;
 445 
 446         if ((fd = open(name + base, O_RDONLY)) == -1) {
 447                 errors = 1;
 448                 if (!sflag) /* Silent mode */
 449                         (void) fprintf(stderr, gettext(
 450                             "%s: can't open \"%s\"\n"),
 451                             cmdname, name);
 452                 return;
 453         }
 454         matched |= grep(fd, name);
 455         (void) close(fd);
 456 
 457         if (ferror(stdout)) {
 458                 (void) fprintf(stderr, gettext(
 459                     "%s: error writing to stdout\n"),
 460                     cmdname);
 461                 (void) fflush(stdout);
 462                 exit(2);
 463         }
 464 
 465 }
 466 
 467 /*
 468  * Add a file of strings to the pattern list.
 469  */
 470 static void
 471 addfile(const char *fn)
 472 {
 473         FILE    *fp;
 474         char    *inbuf;
 475         char    *bufp;
 476         size_t  bufsiz, buflen, bufused;
 477 
 478         /*
 479          * Open the pattern file
 480          */
 481         if ((fp = fopen(fn, "r")) == NULL) {
 482                 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
 483                     cmdname, fn);
 484                 exit(2);
 485         }
 486         bufsiz = BUFSIZE;
 487         if ((inbuf = malloc(bufsiz)) == NULL) {
 488                 (void) fprintf(stderr,
 489                     gettext("%s: out of memory\n"), cmdname);
 490                 exit(2);
 491         }
 492         bufp = inbuf;
 493         bufused = 0;
 494         /*
 495          * Read in the file, reallocing as we need more memory
 496          */
 497         while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
 498                 buflen = strlen(bufp);
 499                 bufused += buflen;
 500                 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
 501                         /*
 502                          * if this line does not fit to the buffer,
 503                          * realloc larger buffer
 504                          */
 505                         bufsiz += BUFSIZE;
 506                         if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
 507                                 (void) fprintf(stderr,
 508                                     gettext("%s: out of memory\n"),
 509                                     cmdname);
 510                                 exit(2);
 511                         }
 512                         bufp = inbuf + bufused;
 513                         continue;
 514                 }
 515                 if (bufp[buflen - 1] == '\n') {
 516                         bufp[--buflen] = '\0';
 517                 }
 518                 addpattern(inbuf);
 519 
 520                 bufp = inbuf;
 521                 bufused = 0;
 522         }
 523         free(inbuf);
 524         (void) fclose(fp);
 525 }
 526 
 527 /*
 528  * Add a string to the pattern list.
 529  */
 530 static void
 531 addpattern(char *s)
 532 {
 533         PATTERN *pp;
 534         char    *wordbuf;
 535         char    *np;
 536 
 537         for (; ; ) {
 538                 np = strchr(s, '\n');
 539                 if (np != NULL)
 540                         *np = '\0';
 541                 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
 542                         (void) fprintf(stderr, gettext(
 543                             "%s: out of memory\n"),
 544                             cmdname);
 545                         exit(2);
 546                 }
 547                 if (wflag) {
 548                         /*
 549                          * Solaris wflag support: Add '<' '>' to pattern to
 550                          * select it as a word. Doesn't make sense with -F
 551                          * but we're Libertarian.
 552                          */
 553                         size_t  slen, wordlen;
 554 
 555                         slen = strlen(s);
 556                         wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
 557                         if ((wordbuf = malloc(wordlen)) == NULL) {
 558                                 (void) fprintf(stderr,
 559                                     gettext("%s: out of memory\n"),
 560                                     cmdname);
 561                                 exit(2);
 562                         }
 563                         (void) strcpy(wordbuf, "\\<");
 564                         (void) strcpy(wordbuf + 2, s);
 565                         (void) strcpy(wordbuf + 2 + slen, "\\>");
 566                 } else {
 567                         if ((wordbuf = strdup(s)) == NULL) {
 568                                 (void) fprintf(stderr,
 569                                     gettext("%s: out of memory\n"),
 570                                     cmdname);
 571                                 exit(2);
 572                         }
 573                 }
 574                 pp->pattern = wordbuf;
 575                 pp->next = patterns;
 576                 patterns = pp;
 577                 if (np == NULL)
 578                         break;
 579                 s = np + 1;
 580         }
 581 }
 582 
 583 /*
 584  * Fix patterns.
 585  * Must do after all arguments read, in case later -i option.
 586  */
 587 static void
 588 fixpatterns(void)
 589 {
 590         PATTERN *pp;
 591         int     rv, fix_pattern, npatterns;
 592 
 593         /*
 594          * As REG_ANCHOR flag is not supported in the current Solaris,
 595          * need to fix the specified pattern if -x is specified with
 596          * grep or egrep
 597          */
 598         fix_pattern = !Fflag && xflag;
 599 
 600         for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
 601                 npatterns++;
 602                 if (fix_pattern) {
 603                         char    *cp, *cq;
 604                         size_t  plen, nplen;
 605 
 606                         plen = strlen(pp->pattern);
 607                         /* '^' pattern '$' */
 608                         nplen = 1 + plen + 1 + 1;
 609                         if ((cp = malloc(nplen)) == NULL) {
 610                                 (void) fprintf(stderr,
 611                                     gettext("%s: out of memory\n"),
 612                                     cmdname);
 613                                 exit(2);
 614                         }
 615                         cq = cp;
 616                         *cq++ = '^';
 617                         cq = strcpy(cq, pp->pattern) + plen;
 618                         *cq++ = '$';
 619                         *cq = '\0';
 620                         free(pp->pattern);
 621                         pp->pattern = cp;
 622                 }
 623 
 624                 if (Fflag) {
 625                         if (use_wchar) {
 626                                 /*
 627                                  * Fflag && mblocale && iflag
 628                                  * Fflag && mblocale && !xflag
 629                                  */
 630                                 size_t  n;
 631                                 n = strlen(pp->pattern) + 1;
 632                                 if ((pp->wpattern =
 633                                     malloc(sizeof (wchar_t) * n)) == NULL) {
 634                                         (void) fprintf(stderr,
 635                                             gettext("%s: out of memory\n"),
 636                                             cmdname);
 637                                         exit(2);
 638                                 }
 639                                 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
 640                                     (size_t)-1) {
 641                                         (void) fprintf(stderr,
 642                                             gettext("%s: failed to convert "
 643                                             "\"%s\" to wide-characters\n"),
 644                                             cmdname, pp->pattern);
 645                                         exit(2);
 646                                 }
 647                                 if (iflag) {
 648                                         wchar_t *wp;
 649                                         for (wp = pp->wpattern; *wp != L'\0';
 650                                             wp++) {
 651                                                 *wp = towlower((wint_t)*wp);
 652                                         }
 653                                 }
 654                                 free(pp->pattern);
 655                         } else {
 656                                 /*
 657                                  * Fflag && mblocale && !iflag
 658                                  * Fflag && !mblocale && iflag
 659                                  * Fflag && !mblocale && !iflag
 660                                  */
 661                                 if (iflag) {
 662                                         unsigned char   *cp;
 663                                         for (cp = (unsigned char *)pp->pattern;
 664                                             *cp != '\0'; cp++) {
 665                                                 *cp = tolower(*cp);
 666                                         }
 667                                 }
 668                         }
 669                         /*
 670                          * fgrep: No regular expressions.
 671                          */
 672                         continue;
 673                 }
 674 
 675                 /*
 676                  * For non-fgrep, compile the regular expression,
 677                  * give an informative error message, and exit if
 678                  * it didn't compile.
 679                  */
 680                 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
 681                         (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
 682                         (void) fprintf(stderr,
 683                             gettext("%s: RE error in %s: %s\n"),
 684                             cmdname, pp->pattern, errstr);
 685                         exit(2);
 686                 }
 687                 free(pp->pattern);
 688         }
 689 
 690         /*
 691          * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
 692          * Use the Boyer-Moore-Gosper algorithm if:
 693          * - fgrep                      (Fflag)
 694          * - singlebyte locale          (!mblocale)
 695          * - no ignoring case           (!iflag)
 696          * - no printing line numbers   (!nflag)
 697          * - no negating the output     (nvflag)
 698          * - only one pattern           (npatterns == 1)
 699          * - non zero length pattern    (strlen(patterns->pattern) != 0)
 700          *
 701          * It's guaranteed patterns->pattern is still alive
 702          * when Fflag && !mblocale.
 703          */
 704         use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
 705             (npatterns == 1) && (strlen(patterns->pattern) != 0);
 706 }
 707 
 708 /*
 709  * Search a newline from the beginning of the string
 710  */
 711 static char *
 712 find_nl(const char *ptr, size_t len)
 713 {
 714         while (len-- != 0) {
 715                 if (*ptr++ == '\n') {
 716                         return ((char *)--ptr);
 717                 }
 718         }
 719         return (NULL);
 720 }
 721 
 722 /*
 723  * Search a newline from the end of the string
 724  */
 725 static char *
 726 rfind_nl(const char *ptr, size_t len)
 727 {
 728         const char      *uptr = ptr + len;
 729         while (len--) {
 730                 if (*--uptr == '\n') {
 731                         return ((char *)uptr);
 732                 }
 733         }
 734         return (NULL);
 735 }
 736 
 737 /*
 738  * Duplicate the specified string converting each character
 739  * into a lower case.
 740  */
 741 static char *
 742 istrdup(const char *s1)
 743 {
 744         static size_t   ibuflen = 0;
 745         static char     *ibuf = NULL;
 746         size_t  slen;
 747         char    *p;
 748 
 749         slen = strlen(s1);
 750         if (slen >= ibuflen) {
 751                 /* ibuf does not fit to s1 */
 752                 ibuflen = slen + 1;
 753                 ibuf = realloc(ibuf, ibuflen);
 754                 if (ibuf == NULL) {
 755                         (void) fprintf(stderr,
 756                             gettext("%s: out of memory\n"), cmdname);
 757                         exit(2);
 758                 }
 759         }
 760         p = ibuf;
 761         do {
 762                 *p++ = tolower(*s1);
 763         } while (*s1++ != '\0');
 764         return (ibuf);
 765 }
 766 
 767 /*
 768  * Do grep on a single file.
 769  * Return true in any lines matched.
 770  *
 771  * We have two strategies:
 772  * The fast one is used when we have a single pattern with
 773  * a string known to occur in the pattern. We can then
 774  * do a BMG match on the whole buffer.
 775  * This is an order of magnitude faster.
 776  * Otherwise we split the buffer into lines,
 777  * and check for a match on each line.
 778  */
 779 static int
 780 grep(int fd, const char *fn)
 781 {
 782         PATTERN *pp;
 783         off_t   data_len;       /* length of the data chunk */
 784         off_t   line_len;       /* length of the current line */
 785         off_t   line_offset;    /* current line's offset from the beginning */
 786         long long       lineno;
 787         long long       matches = 0;    /* Number of matching lines */
 788         int     newlinep;       /* 0 if the last line of file has no newline */
 789         char    *ptr, *ptrend;
 790 
 791 
 792         if (patterns == NULL)
 793                 return (0);     /* no patterns to match -- just return */
 794 
 795         pp = patterns;
 796 
 797         if (use_bmg) {
 798                 bmgcomp(pp->pattern, strlen(pp->pattern));
 799         }
 800 
 801         if (use_wchar && outline == NULL) {
 802                 outbuflen = BUFSIZE + 1;
 803                 outline = malloc(sizeof (wchar_t) * outbuflen);
 804                 if (outline == NULL) {
 805                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 806                             cmdname);
 807                         exit(2);
 808                 }
 809         }
 810 
 811         if (prntbuf == NULL) {
 812                 prntbuflen = BUFSIZE;
 813                 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
 814                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 815                             cmdname);
 816                         exit(2);
 817                 }
 818         }
 819 
 820         line_offset = 0;
 821         lineno = 0;
 822         newlinep = 1;
 823         data_len = 0;
 824         for (; ; ) {
 825                 long    count;
 826                 off_t   offset = 0;
 827 
 828                 if (data_len == 0) {
 829                         /*
 830                          * If no data in the buffer, reset ptr
 831                          */
 832                         ptr = prntbuf;
 833                 }
 834                 if (ptr == prntbuf) {
 835                         /*
 836                          * The current data chunk starts from prntbuf.
 837                          * This means either the buffer has no data
 838                          * or the buffer has no newline.
 839                          * So, read more data from input.
 840                          */
 841                         count = read(fd, ptr + data_len, prntbuflen - data_len);
 842                         if (count < 0) {
 843                                 /* read error */
 844                                 if (cflag) {
 845                                         if (outfn && !rflag) {
 846                                                 (void) fprintf(stdout,
 847                                                     "%s:", fn);
 848                                         }
 849                                         if (!qflag && !rflag) {
 850                                                 (void) fprintf(stdout, "%lld\n",
 851                                                     matches);
 852                                         }
 853                                 }
 854                                 return (0);
 855                         } else if (count == 0) {
 856                                 /* no new data */
 857                                 if (data_len == 0) {
 858                                         /* end of file already reached */
 859                                         break;
 860                                 }
 861                                 /* last line of file has no newline */
 862                                 ptrend = ptr + data_len;
 863                                 newlinep = 0;
 864                                 goto L_start_process;
 865                         }
 866                         offset = data_len;
 867                         data_len += count;
 868                 }
 869 
 870                 /*
 871                  * Look for newline in the chunk
 872                  * between ptr + offset and ptr + data_len - offset.
 873                  */
 874                 ptrend = find_nl(ptr + offset, data_len - offset);
 875                 if (ptrend == NULL) {
 876                         /* no newline found in this chunk */
 877                         if (ptr > prntbuf) {
 878                                 /*
 879                                  * Move remaining data to the beginning
 880                                  * of the buffer.
 881                                  * Remaining data lie from ptr for
 882                                  * data_len bytes.
 883                                  */
 884                                 (void) memmove(prntbuf, ptr, data_len);
 885                         }
 886                         if (data_len == prntbuflen) {
 887                                 /*
 888                                  * No enough room in the buffer
 889                                  */
 890                                 prntbuflen += BUFSIZE;
 891                                 prntbuf = realloc(prntbuf, prntbuflen + 1);
 892                                 if (prntbuf == NULL) {
 893                                         (void) fprintf(stderr,
 894                                             gettext("%s: out of memory\n"),
 895                                             cmdname);
 896                                         exit(2);
 897                                 }
 898                         }
 899                         ptr = prntbuf;
 900                         /* read the next input */
 901                         continue;
 902                 }
 903 L_start_process:
 904 
 905                 /*
 906                  * Beginning of the chunk:      ptr
 907                  * End of the chunk:            ptr + data_len
 908                  * Beginning of the line:       ptr
 909                  * End of the line:             ptrend
 910                  */
 911 
 912                 if (use_bmg) {
 913                         /*
 914                          * Use Boyer-Moore-Gosper algorithm to find out if
 915                          * this chunk (not this line) contains the specified
 916                          * pattern.  If not, restart from the last line
 917                          * of this chunk.
 918                          */
 919                         char    *bline;
 920                         bline = bmgexec(ptr, ptr + data_len);
 921                         if (bline == NULL) {
 922                                 /*
 923                                  * No pattern found in this chunk.
 924                                  * Need to find the last line
 925                                  * in this chunk.
 926                                  */
 927                                 ptrend = rfind_nl(ptr, data_len);
 928 
 929                                 /*
 930                                  * When this chunk does not contain newline,
 931                                  * ptrend becomes NULL, which should happen
 932                                  * when the last line of file does not end
 933                                  * with a newline.  At such a point,
 934                                  * newlinep should have been set to 0.
 935                                  * Therefore, just after jumping to
 936                                  * L_skip_line, the main for-loop quits,
 937                                  * and the line_len value won't be
 938                                  * used.
 939                                  */
 940                                 line_len = ptrend - ptr;
 941                                 goto L_skip_line;
 942                         }
 943                         if (bline > ptrend) {
 944                                 /*
 945                                  * Pattern found not in the first line
 946                                  * of this chunk.
 947                                  * Discard the first line.
 948                                  */
 949                                 line_len = ptrend - ptr;
 950                                 goto L_skip_line;
 951                         }
 952                         /*
 953                          * Pattern found in the first line of this chunk.
 954                          * Using this result.
 955                          */
 956                         *ptrend = '\0';
 957                         line_len = ptrend - ptr;
 958 
 959                         /*
 960                          * before jumping to L_next_line,
 961                          * need to handle xflag if specified
 962                          */
 963                         if (xflag && (line_len != bmglen ||
 964                             strcmp(bmgpat, ptr) != 0)) {
 965                                 /* didn't match */
 966                                 pp = NULL;
 967                         } else {
 968                                 pp = patterns; /* to make it happen */
 969                         }
 970                         goto L_next_line;
 971                 }
 972                 lineno++;
 973                 /*
 974                  * Line starts from ptr and ends at ptrend.
 975                  * line_len will be the length of the line.
 976                  */
 977                 *ptrend = '\0';
 978                 line_len = ptrend - ptr;
 979 
 980                 /*
 981                  * From now, the process will be performed based
 982                  * on the line from ptr to ptrend.
 983                  */
 984                 if (use_wchar) {
 985                         size_t  len;
 986 
 987                         if (line_len >= outbuflen) {
 988                                 outbuflen = line_len + 1;
 989                                 outline = realloc(outline,
 990                                     sizeof (wchar_t) * outbuflen);
 991                                 if (outline == NULL) {
 992                                         (void) fprintf(stderr,
 993                                             gettext("%s: out of memory\n"),
 994                                             cmdname);
 995                                         exit(2);
 996                                 }
 997                         }
 998 
 999                         len = mbstowcs(outline, ptr, line_len);
1000                         if (len == (size_t)-1) {
1001                                 (void) fprintf(stderr, gettext(
1002         "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1003                                     cmdname, fn, lineno);
1004                                 /* never match a line with invalid sequence */
1005                                 goto L_skip_line;
1006                         }
1007                         outline[len] = L'\0';
1008 
1009                         if (iflag) {
1010                                 wchar_t *cp;
1011                                 for (cp = outline; *cp != '\0'; cp++) {
1012                                         *cp = towlower((wint_t)*cp);
1013                                 }
1014                         }
1015 
1016                         if (xflag) {
1017                                 for (pp = patterns; pp; pp = pp->next) {
1018                                         if (outline[0] == pp->wpattern[0] &&
1019                                             wcscmp(outline,
1020                                             pp->wpattern) == 0) {
1021                                                 /* matched */
1022                                                 break;
1023                                         }
1024                                 }
1025                         } else {
1026                                 for (pp = patterns; pp; pp = pp->next) {
1027                                         if (wcswcs(outline, pp->wpattern)
1028                                             != NULL) {
1029                                                 /* matched */
1030                                                 break;
1031                                         }
1032                                 }
1033                         }
1034                 } else if (Fflag) {
1035                         /* fgrep in byte-oriented handling */
1036                         char    *fptr;
1037                         if (iflag) {
1038                                 fptr = istrdup(ptr);
1039                         } else {
1040                                 fptr = ptr;
1041                         }
1042                         if (xflag) {
1043                                 /* fgrep -x */
1044                                 for (pp = patterns; pp; pp = pp->next) {
1045                                         if (fptr[0] == pp->pattern[0] &&
1046                                             strcmp(fptr, pp->pattern) == 0) {
1047                                                 /* matched */
1048                                                 break;
1049                                         }
1050                                 }
1051                         } else {
1052                                 for (pp = patterns; pp; pp = pp->next) {
1053                                         if (strstr(fptr, pp->pattern) != NULL) {
1054                                                 /* matched */
1055                                                 break;
1056                                         }
1057                                 }
1058                         }
1059                 } else {
1060                         /* grep or egrep */
1061                         for (pp = patterns; pp; pp = pp->next) {
1062                                 int     rv;
1063 
1064                                 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1065                                 if (rv == REG_OK) {
1066                                         /* matched */
1067                                         break;
1068                                 }
1069 
1070                                 switch (rv) {
1071                                 case REG_NOMATCH:
1072                                         break;
1073                                 case REG_ECHAR:
1074                                         (void) fprintf(stderr, gettext(
1075             "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1076                                             cmdname, fn, lineno);
1077                                         break;
1078                                 default:
1079                                         (void) regerror(rv, &pp->re, errstr,
1080                                             sizeof (errstr));
1081                                         (void) fprintf(stderr, gettext(
1082             "%s: input file \"%s\": line %lld: %s\n"),
1083                                             cmdname, fn, lineno, errstr);
1084                                         exit(2);
1085                                 }
1086                         }
1087                 }
1088 
1089 L_next_line:
1090                 /*
1091                  * Here, if pp points to non-NULL, something has been matched
1092                  * to the pattern.
1093                  */
1094                 if (nvflag == (pp != NULL)) {
1095                         matches++;
1096                         /*
1097                          * Handle q, l, and c flags.
1098                          */
1099                         if (qflag) {
1100                                 /* no need to continue */
1101                                 /*
1102                                  * End of this line is ptrend.
1103                                  * We have read up to ptr + data_len.
1104                                  */
1105                                 off_t   pos;
1106                                 pos = ptr + data_len - (ptrend + 1);
1107                                 (void) lseek(fd, -pos, SEEK_CUR);
1108                                 exit(0);
1109                         }
1110                         if (lflag) {
1111                                 (void) printf("%s\n", fn);
1112                                 break;
1113                         }
1114                         if (!cflag) {
1115                                 if (outfn) {
1116                                         (void) printf("%s:", fn);
1117                                 }
1118                                 if (bflag) {
1119                                         (void) printf("%lld:", (offset_t)
1120                                             (line_offset / BSIZE));
1121                                 }
1122                                 if (nflag) {
1123                                         (void) printf("%lld:", lineno);
1124                                 }
1125                                 *ptrend = '\n';
1126                                 (void) fwrite(ptr, 1, line_len + 1, stdout);
1127                         }
1128                         if (ferror(stdout)) {
1129                                 return (0);
1130                         }
1131                 }
1132 L_skip_line:
1133                 if (!newlinep)
1134                         break;
1135 
1136                 data_len -= line_len + 1;
1137                 line_offset += line_len + 1;
1138                 ptr = ptrend + 1;
1139         }
1140 
1141         if (cflag) {
1142                 if (outfn) {
1143                         (void) printf("%s:", fn);
1144                 }
1145                 if (!qflag) {
1146                         (void) printf("%lld\n", matches);
1147                 }
1148         }
1149         return (matches != 0);
1150 }
1151 
1152 /*
1153  * usage message for grep
1154  */
1155 static void
1156 usage(void)
1157 {
1158         if (egrep || fgrep) {
1159                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1160                 (void) fprintf(stderr,
1161                     gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1162                     "pattern_list [file ...]\n"));
1163 
1164                 (void) fprintf(stderr, "\t%s", cmdname);
1165                 (void) fprintf(stderr,
1166                     gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1167                     "[-e pattern_list]... "
1168                     "[-f pattern_file]... [file...]\n"));
1169         } else {
1170                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1171                 (void) fprintf(stderr,
1172                     gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1173                     "pattern_list [file ...]\n"));
1174 
1175                 (void) fprintf(stderr, "\t%s", cmdname);
1176                 (void) fprintf(stderr,
1177                     gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1178                     "[-e pattern_list]... "
1179                     "[-f pattern_file]... [file...]\n"));
1180 
1181                 (void) fprintf(stderr, "\t%s", cmdname);
1182                 (void) fprintf(stderr,
1183                     gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1184                     "pattern_list [file ...]\n"));
1185 
1186                 (void) fprintf(stderr, "\t%s", cmdname);
1187                 (void) fprintf(stderr,
1188                     gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1189                     "[-e pattern_list]... "
1190                     "[-f pattern_file]... [file...]\n"));
1191 
1192                 (void) fprintf(stderr, "\t%s", cmdname);
1193                 (void) fprintf(stderr,
1194                     gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1195                     "pattern_list [file ...]\n"));
1196 
1197                 (void) fprintf(stderr, "\t%s", cmdname);
1198                 (void) fprintf(stderr,
1199                     gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1200                     "[-f pattern_file]... [file...]\n"));
1201         }
1202         exit(2);
1203         /* NOTREACHED */
1204 }
1205 
1206 /*
1207  * Compile literal pattern into BMG tables
1208  */
1209 static void
1210 bmgcomp(char *pat, int len)
1211 {
1212         int     i;
1213         int     tlen;
1214         unsigned char   *uc = (unsigned char *)pat;
1215 
1216         bmglen = len;
1217         bmgpat = pat;
1218 
1219         for (i = 0; i < M_CSETSIZE; i++) {
1220                 bmgtab[i] = len;
1221         }
1222 
1223         len--;
1224         for (tlen = len, i = 0; i <= len; i++, tlen--) {
1225                 bmgtab[*uc++] = tlen;
1226         }
1227 }
1228 
1229 /*
1230  * BMG search.
1231  */
1232 static char *
1233 bmgexec(char *str, char *end)
1234 {
1235         int     t;
1236         char    *k, *s, *p;
1237 
1238         k = str + bmglen - 1;
1239         if (bmglen == 1) {
1240                 return (memchr(str, bmgpat[0], end - str));
1241         }
1242         for (; ; ) {
1243                 /* inner loop, should be most optimized */
1244                 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1245                         k += t;
1246                 }
1247                 if (k >= end) {
1248                         return (NULL);
1249                 }
1250                 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1251                         if (p == bmgpat) {
1252                                 return (s);
1253                         }
1254                 }
1255                 k++;
1256         }
1257         /* NOTREACHED */
1258 }