1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 /*
  30  * grep - pattern matching program - combined grep, egrep, and fgrep.
  31  *      Based on MKS grep command, with XCU & Solaris mods.
  32  */
  33 
  34 /*
  35  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
  36  *
  37  */
  38 
  39 #include <string.h>
  40 #include <stdlib.h>
  41 #include <ctype.h>
  42 #include <stdarg.h>
  43 #include <regex.h>
  44 #include <limits.h>
  45 #include <sys/types.h>
  46 #include <sys/stat.h>
  47 #include <fcntl.h>
  48 #include <stdio.h>
  49 #include <locale.h>
  50 #include <wchar.h>
  51 #include <errno.h>
  52 #include <unistd.h>
  53 #include <wctype.h>
  54 
  55 #define BSIZE           512             /* Size of block for -b */
  56 #define BUFSIZE         8192            /* Input buffer size */
  57 
  58 #define M_CSETSIZE      256             /* singlebyte chars */
  59 static int      bmglen;                 /* length of BMG pattern */
  60 static char     *bmgpat;                /* BMG pattern */
  61 static int      bmgtab[M_CSETSIZE];     /* BMG delta1 table */
  62 
  63 typedef struct  _PATTERN        {
  64         char    *pattern;               /* original pattern */
  65         wchar_t *wpattern;              /* wide, lowercased pattern */
  66         struct  _PATTERN        *next;
  67         regex_t re;                     /* compiled pattern */
  68 } PATTERN;
  69 
  70 static PATTERN  *patterns;
  71 static char     errstr[128];            /* regerror string buffer */
  72 static int      regflags = 0;           /* regcomp options */
  73 static uchar_t  fgrep = 0;              /* Invoked as fgrep */
  74 static uchar_t  egrep = 0;              /* Invoked as egrep */
  75 static uchar_t  nvflag = 1;             /* Print matching lines */
  76 static uchar_t  cflag;                  /* Count of matches */
  77 static uchar_t  iflag;                  /* Case insensitve matching */
  78 static uchar_t  hflag;                  /* Supress printing of filename */
  79 static uchar_t  lflag;                  /* Print file names of matches */
  80 static uchar_t  nflag;                  /* Precede lines by line number */
  81 static uchar_t  bflag;                  /* Preccede matches by block number */
  82 static uchar_t  sflag;                  /* Suppress file error messages */
  83 static uchar_t  qflag;                  /* Suppress standard output */
  84 static uchar_t  wflag;                  /* Search for expression as a word */
  85 static uchar_t  xflag;                  /* Anchoring */
  86 static uchar_t  Eflag;                  /* Egrep or -E flag */
  87 static uchar_t  Fflag;                  /* Fgrep or -F flag */
  88 static uchar_t  outfn;                  /* Put out file name */
  89 static char     *cmdname;
  90 
  91 static int      use_wchar, use_bmg, mblocale;
  92 
  93 static size_t   outbuflen, prntbuflen;
  94 static char     *prntbuf;
  95 static wchar_t  *outline;
  96 
  97 static void     addfile(char *fn);
  98 static void     addpattern(char *s);
  99 static void     fixpatterns(void);
 100 static void     usage(void);
 101 static int      grep(int, char *);
 102 static void     bmgcomp(char *, int);
 103 static char     *bmgexec(char *, char *);
 104 
 105 /*
 106  * mainline for grep
 107  */
 108 int
 109 main(int argc, char **argv)
 110 {
 111         char    *ap;
 112         int     matched = 0;
 113         int     c;
 114         int     fflag = 0;
 115         int     errors = 0;
 116         int     i, n_pattern = 0, n_file = 0;
 117         char    **pattern_list = NULL;
 118         char    **file_list = NULL;
 119 
 120         (void) setlocale(LC_ALL, "");
 121 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 122 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
 123 #endif
 124         (void) textdomain(TEXT_DOMAIN);
 125 
 126         /*
 127          * true if this is running on the multibyte locale
 128          */
 129         mblocale = (MB_CUR_MAX > 1);
 130         /*
 131          * Skip leading slashes
 132          */
 133         cmdname = argv[0];
 134         if (ap = strrchr(cmdname, '/'))
 135                 cmdname = ap + 1;
 136 
 137         ap = cmdname;
 138         /*
 139          * Detect egrep/fgrep via command name, map to -E and -F options.
 140          */
 141         if (*ap == 'e' || *ap == 'E') {
 142                 regflags |= REG_EXTENDED;
 143                 egrep++;
 144         } else {
 145                 if (*ap == 'f' || *ap == 'F') {
 146                         fgrep++;
 147                 }
 148         }
 149 
 150         while ((c = getopt(argc, argv, "vwchilnbse:f:qxEFI")) != EOF) {
 151                 switch (c) {
 152                 case 'v':       /* POSIX: negate matches */
 153                         nvflag = 0;
 154                         break;
 155 
 156                 case 'c':       /* POSIX: write count */
 157                         cflag++;
 158                         break;
 159 
 160                 case 'i':       /* POSIX: ignore case */
 161                         iflag++;
 162                         regflags |= REG_ICASE;
 163                         break;
 164 
 165                 case 'l':       /* POSIX: Write filenames only */
 166                         lflag++;
 167                         break;
 168 
 169                 case 'n':       /* POSIX: Write line numbers */
 170                         nflag++;
 171                         break;
 172 
 173                 case 'b':       /* Solaris: Write file block numbers */
 174                         bflag++;
 175                         break;
 176 
 177                 case 's':       /* POSIX: No error msgs for files */
 178                         sflag++;
 179                         break;
 180 
 181                 case 'e':       /* POSIX: pattern list */
 182                         n_pattern++;
 183                         pattern_list = realloc(pattern_list,
 184                             sizeof (char *) * n_pattern);
 185                         if (pattern_list == NULL) {
 186                                 (void) fprintf(stderr,
 187                                     gettext("%s: out of memory\n"),
 188                                     cmdname);
 189                                 exit(2);
 190                         }
 191                         *(pattern_list + n_pattern - 1) = optarg;
 192                         break;
 193 
 194                 case 'f':       /* POSIX: pattern file */
 195                         fflag = 1;
 196                         n_file++;
 197                         file_list = realloc(file_list,
 198                             sizeof (char *) * n_file);
 199                         if (file_list == NULL) {
 200                                 (void) fprintf(stderr,
 201                                     gettext("%s: out of memory\n"),
 202                                     cmdname);
 203                                 exit(2);
 204                         }
 205                         *(file_list + n_file - 1) = optarg;
 206                         break;
 207                 case 'h':       /* Solaris: supress printing of file name */
 208                         hflag = 1;
 209                         break;
 210 
 211                 case 'q':       /* POSIX: quiet: status only */
 212                         qflag++;
 213                         break;
 214 
 215                 case 'w':       /* Solaris: treat pattern as word */
 216                         wflag++;
 217                         break;
 218 
 219                 case 'x':       /* POSIX: full line matches */
 220                         xflag++;
 221                         regflags |= REG_ANCHOR;
 222                         break;
 223 
 224                 case 'E':       /* POSIX: Extended RE's */
 225                         regflags |= REG_EXTENDED;
 226                         Eflag++;
 227                         break;
 228 
 229                 case 'F':       /* POSIX: strings, not RE's */
 230                         Fflag++;
 231                         break;
 232 
 233                 default:
 234                         usage();
 235                 }
 236         }
 237         /*
 238          * If we're invoked as egrep or fgrep we need to do some checks
 239          */
 240 
 241         if (egrep || fgrep) {
 242                 /*
 243                  * Use of -E or -F with egrep or fgrep is illegal
 244                  */
 245                 if (Eflag || Fflag)
 246                         usage();
 247                 /*
 248                  * Don't allow use of wflag with egrep / fgrep
 249                  */
 250                 if (wflag)
 251                         usage();
 252                 /*
 253                  * For Solaris the -s flag is equivalent to XCU -q
 254                  */
 255                 if (sflag)
 256                         qflag++;
 257                 /*
 258                  * done with above checks - set the appropriate flags
 259                  */
 260                 if (egrep)
 261                         Eflag++;
 262                 else                    /* Else fgrep */
 263                         Fflag++;
 264         }
 265 
 266         if (wflag && (Eflag || Fflag)) {
 267                 /*
 268                  * -w cannot be specified with grep -F
 269                  */
 270                 usage();
 271         }
 272 
 273         /*
 274          * -E and -F flags are mutually exclusive - check for this
 275          */
 276         if (Eflag && Fflag)
 277                 usage();
 278 
 279         /*
 280          * -c, -l and -q flags are mutually exclusive
 281          * We have -c override -l like in Solaris.
 282          * -q overrides -l & -c programmatically in grep() function.
 283          */
 284         if (cflag && lflag)
 285                 lflag = 0;
 286 
 287         argv += optind - 1;
 288         argc -= optind - 1;
 289 
 290         /*
 291          * Now handling -e and -f option
 292          */
 293         if (pattern_list) {
 294                 for (i = 0; i < n_pattern; i++) {
 295                         addpattern(pattern_list[i]);
 296                 }
 297                 free(pattern_list);
 298         }
 299         if (file_list) {
 300                 for (i = 0; i < n_file; i++) {
 301                         addfile(file_list[i]);
 302                 }
 303                 free(file_list);
 304         }
 305 
 306         /*
 307          * No -e or -f?  Make sure there is one more arg, use it as the pattern.
 308          */
 309         if (patterns == NULL && !fflag) {
 310                 if (argc < 2)
 311                         usage();
 312                 addpattern(argv[1]);
 313                 argc--;
 314                 argv++;
 315         }
 316 
 317         /*
 318          * If -x flag is not specified or -i flag is specified
 319          * with fgrep in a multibyte locale, need to use
 320          * the wide character APIs.  Otherwise, byte-oriented
 321          * process will be done.
 322          */
 323         use_wchar = Fflag && mblocale && (!xflag || iflag);
 324 
 325         /*
 326          * Compile Patterns and also decide if BMG can be used
 327          */
 328         fixpatterns();
 329 
 330         /* Process all files: stdin, or rest of arg list */
 331         if (argc < 2) {
 332                 matched = grep(0, gettext("(standard input)"));
 333         } else {
 334                 if (argc > 2 && hflag == 0)
 335                         outfn = 1;      /* Print filename on match line */
 336                 for (argv++; *argv != NULL; argv++) {
 337                         int     fd;
 338 
 339                         if ((fd = open(*argv, O_RDONLY)) == -1) {
 340                                 errors = 1;
 341                                 if (sflag)
 342                                         continue;
 343                                 (void) fprintf(stderr, gettext(
 344                                     "%s: can't open \"%s\"\n"),
 345                                     cmdname, *argv);
 346                                 continue;
 347                         }
 348                         matched |= grep(fd, *argv);
 349                         (void) close(fd);
 350                         if (ferror(stdout))
 351                                 break;
 352                 }
 353         }
 354         /*
 355          * Return() here is used instead of exit
 356          */
 357 
 358         (void) fflush(stdout);
 359 
 360         if (errors)
 361                 return (2);
 362         return (matched ? 0 : 1);
 363 }
 364 
 365 /*
 366  * Add a file of strings to the pattern list.
 367  */
 368 static void
 369 addfile(char *fn)
 370 {
 371         FILE    *fp;
 372         char    *inbuf;
 373         char    *bufp;
 374         size_t  bufsiz, buflen, bufused;
 375 
 376         /*
 377          * Open the pattern file
 378          */
 379         if ((fp = fopen(fn, "r")) == NULL) {
 380                 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
 381                     cmdname, fn);
 382                 exit(2);
 383         }
 384         bufsiz = BUFSIZE;
 385         if ((inbuf = malloc(bufsiz)) == NULL) {
 386                 (void) fprintf(stderr,
 387                     gettext("%s: out of memory\n"), cmdname);
 388                 exit(2);
 389         }
 390         bufp = inbuf;
 391         bufused = 0;
 392         /*
 393          * Read in the file, reallocing as we need more memory
 394          */
 395         while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
 396                 buflen = strlen(bufp);
 397                 bufused += buflen;
 398                 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
 399                         /*
 400                          * if this line does not fit to the buffer,
 401                          * realloc larger buffer
 402                          */
 403                         bufsiz += BUFSIZE;
 404                         if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
 405                                 (void) fprintf(stderr,
 406                                     gettext("%s: out of memory\n"),
 407                                     cmdname);
 408                                 exit(2);
 409                         }
 410                         bufp = inbuf + bufused;
 411                         continue;
 412                 }
 413                 if (bufp[buflen - 1] == '\n') {
 414                         bufp[--buflen] = '\0';
 415                 }
 416                 addpattern(inbuf);
 417 
 418                 bufp = inbuf;
 419                 bufused = 0;
 420         }
 421         free(inbuf);
 422         (void) fclose(fp);
 423 }
 424 
 425 /*
 426  * Add a string to the pattern list.
 427  */
 428 static void
 429 addpattern(char *s)
 430 {
 431         PATTERN *pp;
 432         char    *wordbuf;
 433         char    *np;
 434 
 435         for (; ; ) {
 436                 np = strchr(s, '\n');
 437                 if (np != NULL)
 438                         *np = '\0';
 439                 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
 440                         (void) fprintf(stderr, gettext(
 441                             "%s: out of memory\n"),
 442                             cmdname);
 443                         exit(2);
 444                 }
 445                 if (wflag) {
 446                         /*
 447                          * Solaris wflag support: Add '<' '>' to pattern to
 448                          * select it as a word. Doesn't make sense with -F
 449                          * but we're Libertarian.
 450                          */
 451                         size_t  slen, wordlen;
 452 
 453                         slen = strlen(s);
 454                         wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
 455                         if ((wordbuf = malloc(wordlen)) == NULL) {
 456                                 (void) fprintf(stderr,
 457                                     gettext("%s: out of memory\n"),
 458                                     cmdname);
 459                                 exit(2);
 460                         }
 461                         (void) strcpy(wordbuf, "\\<");
 462                         (void) strcpy(wordbuf + 2, s);
 463                         (void) strcpy(wordbuf + 2 + slen, "\\>");
 464                 } else {
 465                         if ((wordbuf = strdup(s)) == NULL) {
 466                                 (void) fprintf(stderr,
 467                                     gettext("%s: out of memory\n"),
 468                                     cmdname);
 469                                 exit(2);
 470                         }
 471                 }
 472                 pp->pattern = wordbuf;
 473                 pp->next = patterns;
 474                 patterns = pp;
 475                 if (np == NULL)
 476                         break;
 477                 s = np + 1;
 478         }
 479 }
 480 
 481 /*
 482  * Fix patterns.
 483  * Must do after all arguments read, in case later -i option.
 484  */
 485 static void
 486 fixpatterns(void)
 487 {
 488         PATTERN *pp;
 489         int     rv, fix_pattern, npatterns;
 490 
 491         /*
 492          * As REG_ANCHOR flag is not supported in the current Solaris,
 493          * need to fix the specified pattern if -x is specified with
 494          * grep or egrep
 495          */
 496         fix_pattern = !Fflag && xflag;
 497 
 498         for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
 499                 npatterns++;
 500                 if (fix_pattern) {
 501                         char    *cp, *cq;
 502                         size_t  plen, nplen;
 503 
 504                         plen = strlen(pp->pattern);
 505                         /* '^' pattern '$' */
 506                         nplen = 1 + plen + 1 + 1;
 507                         if ((cp = malloc(nplen)) == NULL) {
 508                                 (void) fprintf(stderr,
 509                                     gettext("%s: out of memory\n"),
 510                                     cmdname);
 511                                 exit(2);
 512                         }
 513                         cq = cp;
 514                         *cq++ = '^';
 515                         cq = strcpy(cq, pp->pattern) + plen;
 516                         *cq++ = '$';
 517                         *cq = '\0';
 518                         free(pp->pattern);
 519                         pp->pattern = cp;
 520                 }
 521 
 522                 if (Fflag) {
 523                         if (use_wchar) {
 524                                 /*
 525                                  * Fflag && mblocale && iflag
 526                                  * Fflag && mblocale && !xflag
 527                                  */
 528                                 size_t  n;
 529                                 n = strlen(pp->pattern) + 1;
 530                                 if ((pp->wpattern =
 531                                         malloc(sizeof (wchar_t) * n)) == NULL) {
 532                                         (void) fprintf(stderr,
 533                                             gettext("%s: out of memory\n"),
 534                                             cmdname);
 535                                         exit(2);
 536                                 }
 537                                 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
 538                                     (size_t)-1) {
 539                                         (void) fprintf(stderr,
 540                                             gettext("%s: failed to convert "
 541                                                 "\"%s\" to wide-characters\n"),
 542                                             cmdname, pp->pattern);
 543                                         exit(2);
 544                                 }
 545                                 if (iflag) {
 546                                         wchar_t *wp;
 547                                         for (wp = pp->wpattern; *wp != L'\0';
 548                                             wp++) {
 549                                                 *wp = towlower((wint_t)*wp);
 550                                         }
 551                                 }
 552                                 free(pp->pattern);
 553                         } else {
 554                                 /*
 555                                  * Fflag && mblocale && !iflag
 556                                  * Fflag && !mblocale && iflag
 557                                  * Fflag && !mblocale && !iflag
 558                                  */
 559                                 if (iflag) {
 560                                         unsigned char   *cp;
 561                                         for (cp = (unsigned char *)pp->pattern;
 562                                             *cp != '\0'; cp++) {
 563                                                 *cp = tolower(*cp);
 564                                         }
 565                                 }
 566                         }
 567                         /*
 568                          * fgrep: No regular expressions.
 569                          */
 570                         continue;
 571                 }
 572 
 573                 /*
 574                  * For non-fgrep, compile the regular expression,
 575                  * give an informative error message, and exit if
 576                  * it didn't compile.
 577                  */
 578                 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
 579                         (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
 580                         (void) fprintf(stderr,
 581                             gettext("%s: RE error in %s: %s\n"),
 582                                 cmdname, pp->pattern, errstr);
 583                         exit(2);
 584                 }
 585                 free(pp->pattern);
 586         }
 587 
 588         /*
 589          * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
 590          * Use the Boyer-Moore-Gosper algorithm if:
 591          * - fgrep                      (Fflag)
 592          * - singlebyte locale          (!mblocale)
 593          * - no ignoring case           (!iflag)
 594          * - no printing line numbers   (!nflag)
 595          * - no negating the output     (nvflag)
 596          * - only one pattern           (npatterns == 1)
 597          * - non zero length pattern    (strlen(patterns->pattern) != 0)
 598          *
 599          * It's guaranteed patterns->pattern is still alive
 600          * when Fflag && !mblocale.
 601          */
 602         use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
 603             (npatterns == 1) && (strlen(patterns->pattern) != 0);
 604 }
 605 
 606 /*
 607  * Search a newline from the beginning of the string
 608  */
 609 static char *
 610 find_nl(const char *ptr, size_t len)
 611 {
 612         while (len-- != 0) {
 613                 if (*ptr++ == '\n') {
 614                         return ((char *)--ptr);
 615                 }
 616         }
 617         return (NULL);
 618 }
 619 
 620 /*
 621  * Search a newline from the end of the string
 622  */
 623 static char *
 624 rfind_nl(const char *ptr, size_t len)
 625 {
 626         const char      *uptr = ptr + len;
 627         while (len--) {
 628                 if (*--uptr == '\n') {
 629                         return ((char *)uptr);
 630                 }
 631         }
 632         return (NULL);
 633 }
 634 
 635 /*
 636  * Duplicate the specified string converting each character
 637  * into a lower case.
 638  */
 639 static char *
 640 istrdup(const char *s1)
 641 {
 642         static size_t   ibuflen = 0;
 643         static char     *ibuf = NULL;
 644         size_t  slen;
 645         char    *p;
 646 
 647         slen = strlen(s1);
 648         if (slen >= ibuflen) {
 649                 /* ibuf does not fit to s1 */
 650                 ibuflen = slen + 1;
 651                 ibuf = realloc(ibuf, ibuflen);
 652                 if (ibuf == NULL) {
 653                         (void) fprintf(stderr,
 654                             gettext("%s: out of memory\n"), cmdname);
 655                         exit(2);
 656                 }
 657         }
 658         p = ibuf;
 659         do {
 660                 *p++ = tolower(*s1);
 661         } while (*s1++ != '\0');
 662         return (ibuf);
 663 }
 664 
 665 /*
 666  * Do grep on a single file.
 667  * Return true in any lines matched.
 668  *
 669  * We have two strategies:
 670  * The fast one is used when we have a single pattern with
 671  * a string known to occur in the pattern. We can then
 672  * do a BMG match on the whole buffer.
 673  * This is an order of magnitude faster.
 674  * Otherwise we split the buffer into lines,
 675  * and check for a match on each line.
 676  */
 677 static int
 678 grep(int fd, char *fn)
 679 {
 680         PATTERN *pp;
 681         off_t   data_len;       /* length of the data chunk */
 682         off_t   line_len;       /* length of the current line */
 683         off_t   line_offset;    /* current line's offset from the beginning */
 684         long long       lineno;
 685         long long       matches = 0;    /* Number of matching lines */
 686         int     newlinep;       /* 0 if the last line of file has no newline */
 687         char    *ptr, *ptrend;
 688 
 689 
 690         if (patterns == NULL)
 691                 return (0);     /* no patterns to match -- just return */
 692 
 693         pp = patterns;
 694 
 695         if (use_bmg) {
 696                 bmgcomp(pp->pattern, strlen(pp->pattern));
 697         }
 698 
 699         if (use_wchar && outline == NULL) {
 700                 outbuflen = BUFSIZE + 1;
 701                 outline = malloc(sizeof (wchar_t) * outbuflen);
 702                 if (outline == NULL) {
 703                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 704                             cmdname);
 705                         exit(2);
 706                 }
 707         }
 708 
 709         if (prntbuf == NULL) {
 710                 prntbuflen = BUFSIZE;
 711                 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
 712                         (void) fprintf(stderr, gettext("%s: out of memory\n"),
 713                             cmdname);
 714                         exit(2);
 715                 }
 716         }
 717 
 718         line_offset = 0;
 719         lineno = 0;
 720         newlinep = 1;
 721         data_len = 0;
 722         for (; ; ) {
 723                 long    count;
 724                 off_t   offset = 0;
 725 
 726                 if (data_len == 0) {
 727                         /*
 728                          * If no data in the buffer, reset ptr
 729                          */
 730                         ptr = prntbuf;
 731                 }
 732                 if (ptr == prntbuf) {
 733                         /*
 734                          * The current data chunk starts from prntbuf.
 735                          * This means either the buffer has no data
 736                          * or the buffer has no newline.
 737                          * So, read more data from input.
 738                          */
 739                         count = read(fd, ptr + data_len, prntbuflen - data_len);
 740                         if (count < 0) {
 741                                 /* read error */
 742                                 if (cflag) {
 743                                         if (outfn) {
 744                                                 (void) fprintf(stdout,
 745                                                     "%s:", fn);
 746                                         }
 747                                         if (!qflag) {
 748                                                 (void) fprintf(stdout, "%lld\n",
 749                                                     matches);
 750                                         }
 751                                 }
 752                                 return (0);
 753                         } else if (count == 0) {
 754                                 /* no new data */
 755                                 if (data_len == 0) {
 756                                         /* end of file already reached */
 757                                         break;
 758                                 }
 759                                 /* last line of file has no newline */
 760                                 ptrend = ptr + data_len;
 761                                 newlinep = 0;
 762                                 goto L_start_process;
 763                         }
 764                         offset = data_len;
 765                         data_len += count;
 766                 }
 767 
 768                 /*
 769                  * Look for newline in the chunk
 770                  * between ptr + offset and ptr + data_len - offset.
 771                  */
 772                 ptrend = find_nl(ptr + offset, data_len - offset);
 773                 if (ptrend == NULL) {
 774                         /* no newline found in this chunk */
 775                         if (ptr > prntbuf) {
 776                                 /*
 777                                  * Move remaining data to the beginning
 778                                  * of the buffer.
 779                                  * Remaining data lie from ptr for
 780                                  * data_len bytes.
 781                                  */
 782                                 (void) memmove(prntbuf, ptr, data_len);
 783                         }
 784                         if (data_len == prntbuflen) {
 785                                 /*
 786                                  * No enough room in the buffer
 787                                  */
 788                                 prntbuflen += BUFSIZE;
 789                                 prntbuf = realloc(prntbuf, prntbuflen + 1);
 790                                 if (prntbuf == NULL) {
 791                                         (void) fprintf(stderr,
 792                                             gettext("%s: out of memory\n"),
 793                                             cmdname);
 794                                         exit(2);
 795                                 }
 796                         }
 797                         ptr = prntbuf;
 798                         /* read the next input */
 799                         continue;
 800                 }
 801 L_start_process:
 802 
 803                 /*
 804                  * Beginning of the chunk:      ptr
 805                  * End of the chunk:            ptr + data_len
 806                  * Beginning of the line:       ptr
 807                  * End of the line:             ptrend
 808                  */
 809 
 810                 if (use_bmg) {
 811                         /*
 812                          * Use Boyer-Moore-Gosper algorithm to find out if
 813                          * this chunk (not this line) contains the specified
 814                          * pattern.  If not, restart from the last line
 815                          * of this chunk.
 816                          */
 817                         char    *bline;
 818                         bline = bmgexec(ptr, ptr + data_len);
 819                         if (bline == NULL) {
 820                                 /*
 821                                  * No pattern found in this chunk.
 822                                  * Need to find the last line
 823                                  * in this chunk.
 824                                  */
 825                                 ptrend = rfind_nl(ptr, data_len);
 826 
 827                                 /*
 828                                  * When this chunk does not contain newline,
 829                                  * ptrend becomes NULL, which should happen
 830                                  * when the last line of file does not end
 831                                  * with a newline.  At such a point,
 832                                  * newlinep should have been set to 0.
 833                                  * Therefore, just after jumping to
 834                                  * L_skip_line, the main for-loop quits,
 835                                  * and the line_len value won't be
 836                                  * used.
 837                                  */
 838                                 line_len = ptrend - ptr;
 839                                 goto L_skip_line;
 840                         }
 841                         if (bline > ptrend) {
 842                                 /*
 843                                  * Pattern found not in the first line
 844                                  * of this chunk.
 845                                  * Discard the first line.
 846                                  */
 847                                 line_len = ptrend - ptr;
 848                                 goto L_skip_line;
 849                         }
 850                         /*
 851                          * Pattern found in the first line of this chunk.
 852                          * Using this result.
 853                          */
 854                         *ptrend = '\0';
 855                         line_len = ptrend - ptr;
 856 
 857                         /*
 858                          * before jumping to L_next_line,
 859                          * need to handle xflag if specified
 860                          */
 861                         if (xflag && (line_len != bmglen ||
 862                                 strcmp(bmgpat, ptr) != 0)) {
 863                                 /* didn't match */
 864                                 pp = NULL;
 865                         } else {
 866                                 pp = patterns; /* to make it happen */
 867                         }
 868                         goto L_next_line;
 869                 }
 870                 lineno++;
 871                 /*
 872                  * Line starts from ptr and ends at ptrend.
 873                  * line_len will be the length of the line.
 874                  */
 875                 *ptrend = '\0';
 876                 line_len = ptrend - ptr;
 877 
 878                 /*
 879                  * From now, the process will be performed based
 880                  * on the line from ptr to ptrend.
 881                  */
 882                 if (use_wchar) {
 883                         size_t  len;
 884 
 885                         if (line_len >= outbuflen) {
 886                                 outbuflen = line_len + 1;
 887                                 outline = realloc(outline,
 888                                     sizeof (wchar_t) * outbuflen);
 889                                 if (outline == NULL) {
 890                                         (void) fprintf(stderr,
 891                                             gettext("%s: out of memory\n"),
 892                                             cmdname);
 893                                         exit(2);
 894                                 }
 895                         }
 896 
 897                         len = mbstowcs(outline, ptr, line_len);
 898                         if (len == (size_t)-1) {
 899                                 (void) fprintf(stderr, gettext(
 900         "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
 901                                     cmdname, fn, lineno);
 902                                 /* never match a line with invalid sequence */
 903                                 goto L_skip_line;
 904                         }
 905                         outline[len] = L'\0';
 906 
 907                         if (iflag) {
 908                                 wchar_t *cp;
 909                                 for (cp = outline; *cp != '\0'; cp++) {
 910                                         *cp = towlower((wint_t)*cp);
 911                                 }
 912                         }
 913 
 914                         if (xflag) {
 915                                 for (pp = patterns; pp; pp = pp->next) {
 916                                         if (outline[0] == pp->wpattern[0] &&
 917                                             wcscmp(outline,
 918                                                 pp->wpattern) == 0) {
 919                                                 /* matched */
 920                                                 break;
 921                                         }
 922                                 }
 923                         } else {
 924                                 for (pp = patterns; pp; pp = pp->next) {
 925                                         if (wcswcs(outline, pp->wpattern)
 926                                             != NULL) {
 927                                                 /* matched */
 928                                                 break;
 929                                         }
 930                                 }
 931                         }
 932                 } else if (Fflag) {
 933                         /* fgrep in byte-oriented handling */
 934                         char    *fptr;
 935                         if (iflag) {
 936                                 fptr = istrdup(ptr);
 937                         } else {
 938                                 fptr = ptr;
 939                         }
 940                         if (xflag) {
 941                                 /* fgrep -x */
 942                                 for (pp = patterns; pp; pp = pp->next) {
 943                                         if (fptr[0] == pp->pattern[0] &&
 944                                             strcmp(fptr, pp->pattern) == 0) {
 945                                                 /* matched */
 946                                                 break;
 947                                         }
 948                                 }
 949                         } else {
 950                                 for (pp = patterns; pp; pp = pp->next) {
 951                                         if (strstr(fptr, pp->pattern) != NULL) {
 952                                                 /* matched */
 953                                                 break;
 954                                         }
 955                                 }
 956                         }
 957                 } else {
 958                         /* grep or egrep */
 959                         for (pp = patterns; pp; pp = pp->next) {
 960                                 int     rv;
 961 
 962                                 rv = regexec(&pp->re, ptr, 0, NULL, 0);
 963                                 if (rv == REG_OK) {
 964                                         /* matched */
 965                                         break;
 966                                 }
 967 
 968                                 switch (rv) {
 969                                 case REG_NOMATCH:
 970                                         break;
 971                                 case REG_ECHAR:
 972                                         (void) fprintf(stderr, gettext(
 973             "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
 974                                             cmdname, fn, lineno);
 975                                         break;
 976                                 default:
 977                                         (void) regerror(rv, &pp->re, errstr,
 978                                             sizeof (errstr));
 979                                         (void) fprintf(stderr, gettext(
 980             "%s: input file \"%s\": line %lld: %s\n"),
 981                                             cmdname, fn, lineno, errstr);
 982                                         exit(2);
 983                                 }
 984                         }
 985                 }
 986 
 987 L_next_line:
 988                 /*
 989                  * Here, if pp points to non-NULL, something has been matched
 990                  * to the pattern.
 991                  */
 992                 if (nvflag == (pp != NULL)) {
 993                         matches++;
 994                         /*
 995                          * Handle q, l, and c flags.
 996                          */
 997                         if (qflag) {
 998                                 /* no need to continue */
 999                                 /*
1000                                  * End of this line is ptrend.
1001                                  * We have read up to ptr + data_len.
1002                                  */
1003                                 off_t   pos;
1004                                 pos = ptr + data_len - (ptrend + 1);
1005                                 (void) lseek(fd, -pos, SEEK_CUR);
1006                                 exit(0);
1007                         }
1008                         if (lflag) {
1009                                 (void) printf("%s\n", fn);
1010                                 break;
1011                         }
1012                         if (!cflag) {
1013                                 if (outfn) {
1014                                         (void) printf("%s:", fn);
1015                                 }
1016                                 if (bflag) {
1017                                         (void) printf("%lld:", (offset_t)
1018                                             (line_offset / BSIZE));
1019                                 }
1020                                 if (nflag) {
1021                                         (void) printf("%lld:", lineno);
1022                                 }
1023                                 *ptrend = '\n';
1024                                 (void) fwrite(ptr, 1, line_len + 1, stdout);
1025                         }
1026                         if (ferror(stdout)) {
1027                                 return (0);
1028                         }
1029                 }
1030 L_skip_line:
1031                 if (!newlinep)
1032                         break;
1033 
1034                 data_len -= line_len + 1;
1035                 line_offset += line_len + 1;
1036                 ptr = ptrend + 1;
1037         }
1038 
1039         if (cflag) {
1040                 if (outfn) {
1041                         (void) printf("%s:", fn);
1042                 }
1043                 if (!qflag) {
1044                         (void) printf("%lld\n", matches);
1045                 }
1046         }
1047         return (matches != 0);
1048 }
1049 
1050 /*
1051  * usage message for grep
1052  */
1053 static void
1054 usage(void)
1055 {
1056         if (egrep || fgrep) {
1057                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1058                 (void) fprintf(stderr,
1059                     gettext(" [-c|-l|-q] [-bhinsvx] "
1060                         "pattern_list [file ...]\n"));
1061 
1062                 (void) fprintf(stderr, "\t%s", cmdname);
1063                 (void) fprintf(stderr,
1064                     gettext(" [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1065                         "[-f pattern_file]... [file...]\n"));
1066         } else {
1067                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1068                 (void) fprintf(stderr,
1069                     gettext(" [-c|-l|-q] [-bhinsvwx] "
1070                         "pattern_list [file ...]\n"));
1071 
1072                 (void) fprintf(stderr, "\t%s", cmdname);
1073                 (void) fprintf(stderr,
1074                     gettext(" [-c|-l|-q] [-bhinsvwx] [-e pattern_list]... "
1075                         "[-f pattern_file]... [file...]\n"));
1076 
1077                 (void) fprintf(stderr, "\t%s", cmdname);
1078                 (void) fprintf(stderr,
1079                     gettext(" -E [-c|-l|-q] [-bhinsvx] "
1080                         "pattern_list [file ...]\n"));
1081 
1082                 (void) fprintf(stderr, "\t%s", cmdname);
1083                 (void) fprintf(stderr,
1084                     gettext(" -E [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1085                         "[-f pattern_file]... [file...]\n"));
1086 
1087                 (void) fprintf(stderr, "\t%s", cmdname);
1088                 (void) fprintf(stderr,
1089                     gettext(" -F [-c|-l|-q] [-bhinsvx] "
1090                         "pattern_list [file ...]\n"));
1091 
1092                 (void) fprintf(stderr, "\t%s", cmdname);
1093                 (void) fprintf(stderr,
1094                     gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1095                         "[-f pattern_file]... [file...]\n"));
1096         }
1097         exit(2);
1098         /* NOTREACHED */
1099 }
1100 
1101 /*
1102  * Compile literal pattern into BMG tables
1103  */
1104 static void
1105 bmgcomp(char *pat, int len)
1106 {
1107         int     i;
1108         int     tlen;
1109         unsigned char   *uc = (unsigned char *)pat;
1110 
1111         bmglen = len;
1112         bmgpat = pat;
1113 
1114         for (i = 0; i < M_CSETSIZE; i++) {
1115                 bmgtab[i] = len;
1116         }
1117 
1118         len--;
1119         for (tlen = len, i = 0; i <= len; i++, tlen--) {
1120                 bmgtab[*uc++] = tlen;
1121         }
1122 }
1123 
1124 /*
1125  * BMG search.
1126  */
1127 static char *
1128 bmgexec(char *str, char *end)
1129 {
1130         int     t;
1131         char    *k, *s, *p;
1132 
1133         k = str + bmglen - 1;
1134         if (bmglen == 1) {
1135                 return (memchr(str, bmgpat[0], end - str));
1136         }
1137         for (; ; ) {
1138                 /* inner loop, should be most optimized */
1139                 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1140                         k += t;
1141                 }
1142                 if (k >= end) {
1143                         return (NULL);
1144                 }
1145                 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1146                         if (p == bmgpat) {
1147                                 return (s);
1148                         }
1149                 }
1150                 k++;
1151         }
1152         /* NOTREACHED */
1153 }