1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ 38 39 #include <string.h> 40 #include <stdlib.h> 41 #include <ctype.h> 42 #include <stdarg.h> 43 #include <regex.h> 44 #include <limits.h> 45 #include <sys/types.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdio.h> 49 #include <locale.h> 50 #include <wchar.h> 51 #include <errno.h> 52 #include <unistd.h> 53 #include <wctype.h> 54 #include <ftw.h> 55 #include <sys/param.h> 56 57 #define STDIN_FILENAME gettext("(standard input)") 58 59 #define BSIZE 512 /* Size of block for -b */ 60 #define BUFSIZE 8192 /* Input buffer size */ 61 #define MAX_DEPTH 1000 /* how deep to recurse */ 62 63 #define M_CSETSIZE 256 /* singlebyte chars */ 64 static int bmglen; /* length of BMG pattern */ 65 static char *bmgpat; /* BMG pattern */ 66 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 67 68 typedef struct _PATTERN { 69 char *pattern; /* original pattern */ 70 wchar_t *wpattern; /* wide, lowercased pattern */ 71 struct _PATTERN *next; 72 regex_t re; /* compiled pattern */ 73 } PATTERN; 74 75 static PATTERN *patterns; 76 static char errstr[128]; /* regerror string buffer */ 77 static int regflags = 0; /* regcomp options */ 78 static int matched = 0; /* return of the grep() */ 79 static int errors = 0; /* count of errors */ 80 static uchar_t fgrep = 0; /* Invoked as fgrep */ 81 static uchar_t egrep = 0; /* Invoked as egrep */ 82 static uchar_t nvflag = 1; /* Print matching lines */ 83 static uchar_t cflag; /* Count of matches */ 84 static uchar_t iflag; /* Case insensitve matching */ 85 static uchar_t Hflag; /* Precede lines by file name */ 86 static uchar_t hflag; /* Supress printing of filename */ 87 static uchar_t lflag; /* Print file names of matches */ 88 static uchar_t nflag; /* Precede lines by line number */ 89 static uchar_t rflag; /* Search directories recursively */ 90 static uchar_t bflag; /* Preccede matches by block number */ 91 static uchar_t sflag; /* Suppress file error messages */ 92 static uchar_t qflag; /* Suppress standard output */ 93 static uchar_t wflag; /* Search for expression as a word */ 94 static uchar_t xflag; /* Anchoring */ 95 static uchar_t Eflag; /* Egrep or -E flag */ 96 static uchar_t Fflag; /* Fgrep or -F flag */ 97 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 98 static uchar_t outfn; /* Put out file name */ 99 static char *cmdname; 100 101 static int use_wchar, use_bmg, mblocale; 102 103 static size_t outbuflen, prntbuflen; 104 static char *prntbuf; 105 static wchar_t *outline; 106 107 static void addfile(const char *fn); 108 static void addpattern(char *s); 109 static void fixpatterns(void); 110 static void usage(void); 111 static int grep(int, const char *); 112 static void bmgcomp(char *, int); 113 static char *bmgexec(char *, char *); 114 static int recursive(const char *, const struct stat *, int, struct FTW *); 115 static void process_path(const char *); 116 static void process_file(const char *, int); 117 118 /* 119 * mainline for grep 120 */ 121 int 122 main(int argc, char **argv) 123 { 124 char *ap; 125 int c; 126 int fflag = 0; 127 int i, n_pattern = 0, n_file = 0; 128 char **pattern_list = NULL; 129 char **file_list = NULL; 130 131 (void) setlocale(LC_ALL, ""); 132 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 133 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 134 #endif 135 (void) textdomain(TEXT_DOMAIN); 136 137 /* 138 * true if this is running on the multibyte locale 139 */ 140 mblocale = (MB_CUR_MAX > 1); 141 /* 142 * Skip leading slashes 143 */ 144 cmdname = argv[0]; 145 if (ap = strrchr(cmdname, '/')) 146 cmdname = ap + 1; 147 148 ap = cmdname; 149 /* 150 * Detect egrep/fgrep via command name, map to -E and -F options. 151 */ 152 if (*ap == 'e' || *ap == 'E') { 153 regflags |= REG_EXTENDED; 154 egrep++; 155 } else { 156 if (*ap == 'f' || *ap == 'F') { 157 fgrep++; 158 } 159 } 160 161 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) { 162 switch (c) { 163 case 'v': /* POSIX: negate matches */ 164 nvflag = 0; 165 break; 166 167 case 'c': /* POSIX: write count */ 168 cflag++; 169 break; 170 171 case 'i': /* POSIX: ignore case */ 172 iflag++; 173 regflags |= REG_ICASE; 174 break; 175 176 case 'l': /* POSIX: Write filenames only */ 177 lflag++; 178 break; 179 180 case 'n': /* POSIX: Write line numbers */ 181 nflag++; 182 break; 183 184 case 'r': /* Solaris: search recursively */ 185 rflag++; 186 break; 187 188 case 'b': /* Solaris: Write file block numbers */ 189 bflag++; 190 break; 191 192 case 's': /* POSIX: No error msgs for files */ 193 sflag++; 194 break; 195 196 case 'e': /* POSIX: pattern list */ 197 n_pattern++; 198 pattern_list = realloc(pattern_list, 199 sizeof (char *) * n_pattern); 200 if (pattern_list == NULL) { 201 (void) fprintf(stderr, 202 gettext("%s: out of memory\n"), 203 cmdname); 204 exit(2); 205 } 206 *(pattern_list + n_pattern - 1) = optarg; 207 break; 208 209 case 'f': /* POSIX: pattern file */ 210 fflag = 1; 211 n_file++; 212 file_list = realloc(file_list, 213 sizeof (char *) * n_file); 214 if (file_list == NULL) { 215 (void) fprintf(stderr, 216 gettext("%s: out of memory\n"), 217 cmdname); 218 exit(2); 219 } 220 *(file_list + n_file - 1) = optarg; 221 break; 222 223 /* based on options order h or H is set as in GNU grep */ 224 case 'h': /* Solaris: supress printing of file name */ 225 hflag = 1; 226 Hflag = 0; 227 break; 228 /* Solaris: precede every matching with file name */ 229 case 'H': 230 Hflag = 1; 231 hflag = 0; 232 break; 233 234 case 'q': /* POSIX: quiet: status only */ 235 qflag++; 236 break; 237 238 case 'w': /* Solaris: treat pattern as word */ 239 wflag++; 240 break; 241 242 case 'x': /* POSIX: full line matches */ 243 xflag++; 244 regflags |= REG_ANCHOR; 245 break; 246 247 case 'E': /* POSIX: Extended RE's */ 248 regflags |= REG_EXTENDED; 249 Eflag++; 250 break; 251 252 case 'F': /* POSIX: strings, not RE's */ 253 Fflag++; 254 break; 255 256 case 'R': /* Solaris: like rflag, but follow symlinks */ 257 Rflag++; 258 rflag++; 259 break; 260 261 default: 262 usage(); 263 } 264 } 265 /* 266 * If we're invoked as egrep or fgrep we need to do some checks 267 */ 268 269 if (egrep || fgrep) { 270 /* 271 * Use of -E or -F with egrep or fgrep is illegal 272 */ 273 if (Eflag || Fflag) 274 usage(); 275 /* 276 * Don't allow use of wflag with egrep / fgrep 277 */ 278 if (wflag) 279 usage(); 280 /* 281 * For Solaris the -s flag is equivalent to XCU -q 282 */ 283 if (sflag) 284 qflag++; 285 /* 286 * done with above checks - set the appropriate flags 287 */ 288 if (egrep) 289 Eflag++; 290 else /* Else fgrep */ 291 Fflag++; 292 } 293 294 if (wflag && (Eflag || Fflag)) { 295 /* 296 * -w cannot be specified with grep -F 297 */ 298 usage(); 299 } 300 301 /* 302 * -E and -F flags are mutually exclusive - check for this 303 */ 304 if (Eflag && Fflag) 305 usage(); 306 307 /* 308 * -l overrides -H like in GNU grep 309 */ 310 if (lflag) 311 Hflag = 0; 312 313 /* 314 * -c, -l and -q flags are mutually exclusive 315 * We have -c override -l like in Solaris. 316 * -q overrides -l & -c programmatically in grep() function. 317 */ 318 if (cflag && lflag) 319 lflag = 0; 320 321 argv += optind - 1; 322 argc -= optind - 1; 323 324 /* 325 * Now handling -e and -f option 326 */ 327 if (pattern_list) { 328 for (i = 0; i < n_pattern; i++) { 329 addpattern(pattern_list[i]); 330 } 331 free(pattern_list); 332 } 333 if (file_list) { 334 for (i = 0; i < n_file; i++) { 335 addfile(file_list[i]); 336 } 337 free(file_list); 338 } 339 340 /* 341 * No -e or -f? Make sure there is one more arg, use it as the pattern. 342 */ 343 if (patterns == NULL && !fflag) { 344 if (argc < 2) 345 usage(); 346 addpattern(argv[1]); 347 argc--; 348 argv++; 349 } 350 351 /* 352 * If -x flag is not specified or -i flag is specified 353 * with fgrep in a multibyte locale, need to use 354 * the wide character APIs. Otherwise, byte-oriented 355 * process will be done. 356 */ 357 use_wchar = Fflag && mblocale && (!xflag || iflag); 358 359 /* 360 * Compile Patterns and also decide if BMG can be used 361 */ 362 fixpatterns(); 363 364 /* Process all files: stdin, or rest of arg list */ 365 if (argc < 2) { 366 matched = grep(0, STDIN_FILENAME); 367 } else { 368 if (Hflag || (argc > 2 && hflag == 0)) 369 outfn = 1; /* Print filename on match line */ 370 for (argv++; *argv != NULL; argv++) { 371 process_path(*argv); 372 } 373 } 374 /* 375 * Return() here is used instead of exit 376 */ 377 378 (void) fflush(stdout); 379 380 if (errors) 381 return (2); 382 return (matched ? 0 : 1); 383 } 384 385 static void 386 process_path(const char *path) 387 { 388 struct stat st; 389 int walkflags = FTW_CHDIR; 390 char *buf = NULL; 391 392 if (rflag) { 393 if (stat(path, &st) != -1 && 394 (st.st_mode & S_IFMT) == S_IFDIR) { 395 outfn = 1; /* Print filename */ 396 397 /* 398 * Add trailing slash if arg 399 * is directory, to resolve symlinks. 400 */ 401 if (path[strlen(path) - 1] != '/') { 402 (void) asprintf(&buf, "%s/", path); 403 if (buf != NULL) 404 path = buf; 405 } 406 407 /* 408 * Search through subdirs if path is directory. 409 * Don't follow symlinks if Rflag is not set. 410 */ 411 if (!Rflag) 412 walkflags |= FTW_PHYS; 413 414 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 415 if (!sflag) 416 (void) fprintf(stderr, 417 gettext("%s: can't open \"%s\"\n"), 418 cmdname, path); 419 errors = 1; 420 } 421 return; 422 } 423 } 424 process_file(path, 0); 425 } 426 427 /* 428 * Read and process all files in directory recursively. 429 */ 430 static int 431 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 432 { 433 /* 434 * Process files and follow symlinks if Rflag set. 435 */ 436 if (info != FTW_F) { 437 /* Report broken symlinks and unreadable files */ 438 if (!sflag && 439 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 440 (void) fprintf(stderr, 441 gettext("%s: can't open \"%s\"\n"), cmdname, name); 442 } 443 return (0); 444 } 445 446 447 /* Skip devices and pipes if Rflag is not set */ 448 if (!Rflag && !S_ISREG(statp->st_mode)) 449 return (0); 450 /* Pass offset to relative name from FTW_CHDIR */ 451 process_file(name, ftw->base); 452 return (0); 453 } 454 455 /* 456 * Opens file and call grep function. 457 */ 458 static void 459 process_file(const char *name, int base) 460 { 461 int fd; 462 463 if ((fd = open(name + base, O_RDONLY)) == -1) { 464 errors = 1; 465 if (!sflag) /* Silent mode */ 466 (void) fprintf(stderr, gettext( 467 "%s: can't open \"%s\"\n"), 468 cmdname, name); 469 return; 470 } 471 matched |= grep(fd, name); 472 (void) close(fd); 473 474 if (ferror(stdout)) { 475 (void) fprintf(stderr, gettext( 476 "%s: error writing to stdout\n"), 477 cmdname); 478 (void) fflush(stdout); 479 exit(2); 480 } 481 482 } 483 484 /* 485 * Add a file of strings to the pattern list. 486 */ 487 static void 488 addfile(const char *fn) 489 { 490 FILE *fp; 491 char *inbuf; 492 char *bufp; 493 size_t bufsiz, buflen, bufused; 494 495 /* 496 * Open the pattern file 497 */ 498 if ((fp = fopen(fn, "r")) == NULL) { 499 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 500 cmdname, fn); 501 exit(2); 502 } 503 bufsiz = BUFSIZE; 504 if ((inbuf = malloc(bufsiz)) == NULL) { 505 (void) fprintf(stderr, 506 gettext("%s: out of memory\n"), cmdname); 507 exit(2); 508 } 509 bufp = inbuf; 510 bufused = 0; 511 /* 512 * Read in the file, reallocing as we need more memory 513 */ 514 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 515 buflen = strlen(bufp); 516 bufused += buflen; 517 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 518 /* 519 * if this line does not fit to the buffer, 520 * realloc larger buffer 521 */ 522 bufsiz += BUFSIZE; 523 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 524 (void) fprintf(stderr, 525 gettext("%s: out of memory\n"), 526 cmdname); 527 exit(2); 528 } 529 bufp = inbuf + bufused; 530 continue; 531 } 532 if (bufp[buflen - 1] == '\n') { 533 bufp[--buflen] = '\0'; 534 } 535 addpattern(inbuf); 536 537 bufp = inbuf; 538 bufused = 0; 539 } 540 free(inbuf); 541 (void) fclose(fp); 542 } 543 544 /* 545 * Add a string to the pattern list. 546 */ 547 static void 548 addpattern(char *s) 549 { 550 PATTERN *pp; 551 char *wordbuf; 552 char *np; 553 554 for (; ; ) { 555 np = strchr(s, '\n'); 556 if (np != NULL) 557 *np = '\0'; 558 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 559 (void) fprintf(stderr, gettext( 560 "%s: out of memory\n"), 561 cmdname); 562 exit(2); 563 } 564 if (wflag) { 565 /* 566 * Solaris wflag support: Add '<' '>' to pattern to 567 * select it as a word. Doesn't make sense with -F 568 * but we're Libertarian. 569 */ 570 size_t slen, wordlen; 571 572 slen = strlen(s); 573 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 574 if ((wordbuf = malloc(wordlen)) == NULL) { 575 (void) fprintf(stderr, 576 gettext("%s: out of memory\n"), 577 cmdname); 578 exit(2); 579 } 580 (void) strcpy(wordbuf, "\\<"); 581 (void) strcpy(wordbuf + 2, s); 582 (void) strcpy(wordbuf + 2 + slen, "\\>"); 583 } else { 584 if ((wordbuf = strdup(s)) == NULL) { 585 (void) fprintf(stderr, 586 gettext("%s: out of memory\n"), 587 cmdname); 588 exit(2); 589 } 590 } 591 pp->pattern = wordbuf; 592 pp->next = patterns; 593 patterns = pp; 594 if (np == NULL) 595 break; 596 s = np + 1; 597 } 598 } 599 600 /* 601 * Fix patterns. 602 * Must do after all arguments read, in case later -i option. 603 */ 604 static void 605 fixpatterns(void) 606 { 607 PATTERN *pp; 608 int rv, fix_pattern, npatterns; 609 610 /* 611 * As REG_ANCHOR flag is not supported in the current Solaris, 612 * need to fix the specified pattern if -x is specified with 613 * grep or egrep 614 */ 615 fix_pattern = !Fflag && xflag; 616 617 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 618 npatterns++; 619 if (fix_pattern) { 620 char *cp, *cq; 621 size_t plen, nplen; 622 623 plen = strlen(pp->pattern); 624 /* '^' pattern '$' */ 625 nplen = 1 + plen + 1 + 1; 626 if ((cp = malloc(nplen)) == NULL) { 627 (void) fprintf(stderr, 628 gettext("%s: out of memory\n"), 629 cmdname); 630 exit(2); 631 } 632 cq = cp; 633 *cq++ = '^'; 634 cq = strcpy(cq, pp->pattern) + plen; 635 *cq++ = '$'; 636 *cq = '\0'; 637 free(pp->pattern); 638 pp->pattern = cp; 639 } 640 641 if (Fflag) { 642 if (use_wchar) { 643 /* 644 * Fflag && mblocale && iflag 645 * Fflag && mblocale && !xflag 646 */ 647 size_t n; 648 n = strlen(pp->pattern) + 1; 649 if ((pp->wpattern = 650 malloc(sizeof (wchar_t) * n)) == NULL) { 651 (void) fprintf(stderr, 652 gettext("%s: out of memory\n"), 653 cmdname); 654 exit(2); 655 } 656 if (mbstowcs(pp->wpattern, pp->pattern, n) == 657 (size_t)-1) { 658 (void) fprintf(stderr, 659 gettext("%s: failed to convert " 660 "\"%s\" to wide-characters\n"), 661 cmdname, pp->pattern); 662 exit(2); 663 } 664 if (iflag) { 665 wchar_t *wp; 666 for (wp = pp->wpattern; *wp != L'\0'; 667 wp++) { 668 *wp = towlower((wint_t)*wp); 669 } 670 } 671 free(pp->pattern); 672 } else { 673 /* 674 * Fflag && mblocale && !iflag 675 * Fflag && !mblocale && iflag 676 * Fflag && !mblocale && !iflag 677 */ 678 if (iflag) { 679 unsigned char *cp; 680 for (cp = (unsigned char *)pp->pattern; 681 *cp != '\0'; cp++) { 682 *cp = tolower(*cp); 683 } 684 } 685 } 686 /* 687 * fgrep: No regular expressions. 688 */ 689 continue; 690 } 691 692 /* 693 * For non-fgrep, compile the regular expression, 694 * give an informative error message, and exit if 695 * it didn't compile. 696 */ 697 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 698 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 699 (void) fprintf(stderr, 700 gettext("%s: RE error in %s: %s\n"), 701 cmdname, pp->pattern, errstr); 702 exit(2); 703 } 704 free(pp->pattern); 705 } 706 707 /* 708 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 709 * Use the Boyer-Moore-Gosper algorithm if: 710 * - fgrep (Fflag) 711 * - singlebyte locale (!mblocale) 712 * - no ignoring case (!iflag) 713 * - no printing line numbers (!nflag) 714 * - no negating the output (nvflag) 715 * - only one pattern (npatterns == 1) 716 * - non zero length pattern (strlen(patterns->pattern) != 0) 717 * 718 * It's guaranteed patterns->pattern is still alive 719 * when Fflag && !mblocale. 720 */ 721 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 722 (npatterns == 1) && (strlen(patterns->pattern) != 0); 723 } 724 725 /* 726 * Search a newline from the beginning of the string 727 */ 728 static char * 729 find_nl(const char *ptr, size_t len) 730 { 731 while (len-- != 0) { 732 if (*ptr++ == '\n') { 733 return ((char *)--ptr); 734 } 735 } 736 return (NULL); 737 } 738 739 /* 740 * Search a newline from the end of the string 741 */ 742 static char * 743 rfind_nl(const char *ptr, size_t len) 744 { 745 const char *uptr = ptr + len; 746 while (len--) { 747 if (*--uptr == '\n') { 748 return ((char *)uptr); 749 } 750 } 751 return (NULL); 752 } 753 754 /* 755 * Duplicate the specified string converting each character 756 * into a lower case. 757 */ 758 static char * 759 istrdup(const char *s1) 760 { 761 static size_t ibuflen = 0; 762 static char *ibuf = NULL; 763 size_t slen; 764 char *p; 765 766 slen = strlen(s1); 767 if (slen >= ibuflen) { 768 /* ibuf does not fit to s1 */ 769 ibuflen = slen + 1; 770 ibuf = realloc(ibuf, ibuflen); 771 if (ibuf == NULL) { 772 (void) fprintf(stderr, 773 gettext("%s: out of memory\n"), cmdname); 774 exit(2); 775 } 776 } 777 p = ibuf; 778 do { 779 *p++ = tolower(*s1); 780 } while (*s1++ != '\0'); 781 return (ibuf); 782 } 783 784 /* 785 * Do grep on a single file. 786 * Return true in any lines matched. 787 * 788 * We have two strategies: 789 * The fast one is used when we have a single pattern with 790 * a string known to occur in the pattern. We can then 791 * do a BMG match on the whole buffer. 792 * This is an order of magnitude faster. 793 * Otherwise we split the buffer into lines, 794 * and check for a match on each line. 795 */ 796 static int 797 grep(int fd, const char *fn) 798 { 799 PATTERN *pp; 800 off_t data_len; /* length of the data chunk */ 801 off_t line_len; /* length of the current line */ 802 off_t line_offset; /* current line's offset from the beginning */ 803 long long lineno; 804 long long matches = 0; /* Number of matching lines */ 805 int newlinep; /* 0 if the last line of file has no newline */ 806 char *ptr, *ptrend; 807 808 809 if (patterns == NULL) 810 return (0); /* no patterns to match -- just return */ 811 812 pp = patterns; 813 814 if (use_bmg) { 815 bmgcomp(pp->pattern, strlen(pp->pattern)); 816 } 817 818 if (use_wchar && outline == NULL) { 819 outbuflen = BUFSIZE + 1; 820 outline = malloc(sizeof (wchar_t) * outbuflen); 821 if (outline == NULL) { 822 (void) fprintf(stderr, gettext("%s: out of memory\n"), 823 cmdname); 824 exit(2); 825 } 826 } 827 828 if (prntbuf == NULL) { 829 prntbuflen = BUFSIZE; 830 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 831 (void) fprintf(stderr, gettext("%s: out of memory\n"), 832 cmdname); 833 exit(2); 834 } 835 } 836 837 line_offset = 0; 838 lineno = 0; 839 newlinep = 1; 840 data_len = 0; 841 for (; ; ) { 842 long count; 843 off_t offset = 0; 844 845 if (data_len == 0) { 846 /* 847 * If no data in the buffer, reset ptr 848 */ 849 ptr = prntbuf; 850 } 851 if (ptr == prntbuf) { 852 /* 853 * The current data chunk starts from prntbuf. 854 * This means either the buffer has no data 855 * or the buffer has no newline. 856 * So, read more data from input. 857 */ 858 count = read(fd, ptr + data_len, prntbuflen - data_len); 859 if (count < 0) { 860 /* read error */ 861 if (cflag) { 862 if (outfn && !rflag) { 863 (void) fprintf(stdout, 864 "%s:", fn); 865 } 866 if (!qflag && !rflag) { 867 (void) fprintf(stdout, "%lld\n", 868 matches); 869 } 870 } 871 return (0); 872 } else if (count == 0) { 873 /* no new data */ 874 if (data_len == 0) { 875 /* end of file already reached */ 876 break; 877 } 878 /* last line of file has no newline */ 879 ptrend = ptr + data_len; 880 newlinep = 0; 881 goto L_start_process; 882 } 883 offset = data_len; 884 data_len += count; 885 } 886 887 /* 888 * Look for newline in the chunk 889 * between ptr + offset and ptr + data_len - offset. 890 */ 891 ptrend = find_nl(ptr + offset, data_len - offset); 892 if (ptrend == NULL) { 893 /* no newline found in this chunk */ 894 if (ptr > prntbuf) { 895 /* 896 * Move remaining data to the beginning 897 * of the buffer. 898 * Remaining data lie from ptr for 899 * data_len bytes. 900 */ 901 (void) memmove(prntbuf, ptr, data_len); 902 } 903 if (data_len == prntbuflen) { 904 /* 905 * No enough room in the buffer 906 */ 907 prntbuflen += BUFSIZE; 908 prntbuf = realloc(prntbuf, prntbuflen + 1); 909 if (prntbuf == NULL) { 910 (void) fprintf(stderr, 911 gettext("%s: out of memory\n"), 912 cmdname); 913 exit(2); 914 } 915 } 916 ptr = prntbuf; 917 /* read the next input */ 918 continue; 919 } 920 L_start_process: 921 922 /* 923 * Beginning of the chunk: ptr 924 * End of the chunk: ptr + data_len 925 * Beginning of the line: ptr 926 * End of the line: ptrend 927 */ 928 929 if (use_bmg) { 930 /* 931 * Use Boyer-Moore-Gosper algorithm to find out if 932 * this chunk (not this line) contains the specified 933 * pattern. If not, restart from the last line 934 * of this chunk. 935 */ 936 char *bline; 937 bline = bmgexec(ptr, ptr + data_len); 938 if (bline == NULL) { 939 /* 940 * No pattern found in this chunk. 941 * Need to find the last line 942 * in this chunk. 943 */ 944 ptrend = rfind_nl(ptr, data_len); 945 946 /* 947 * When this chunk does not contain newline, 948 * ptrend becomes NULL, which should happen 949 * when the last line of file does not end 950 * with a newline. At such a point, 951 * newlinep should have been set to 0. 952 * Therefore, just after jumping to 953 * L_skip_line, the main for-loop quits, 954 * and the line_len value won't be 955 * used. 956 */ 957 line_len = ptrend - ptr; 958 goto L_skip_line; 959 } 960 if (bline > ptrend) { 961 /* 962 * Pattern found not in the first line 963 * of this chunk. 964 * Discard the first line. 965 */ 966 line_len = ptrend - ptr; 967 goto L_skip_line; 968 } 969 /* 970 * Pattern found in the first line of this chunk. 971 * Using this result. 972 */ 973 *ptrend = '\0'; 974 line_len = ptrend - ptr; 975 976 /* 977 * before jumping to L_next_line, 978 * need to handle xflag if specified 979 */ 980 if (xflag && (line_len != bmglen || 981 strcmp(bmgpat, ptr) != 0)) { 982 /* didn't match */ 983 pp = NULL; 984 } else { 985 pp = patterns; /* to make it happen */ 986 } 987 goto L_next_line; 988 } 989 lineno++; 990 /* 991 * Line starts from ptr and ends at ptrend. 992 * line_len will be the length of the line. 993 */ 994 *ptrend = '\0'; 995 line_len = ptrend - ptr; 996 997 /* 998 * From now, the process will be performed based 999 * on the line from ptr to ptrend. 1000 */ 1001 if (use_wchar) { 1002 size_t len; 1003 1004 if (line_len >= outbuflen) { 1005 outbuflen = line_len + 1; 1006 outline = realloc(outline, 1007 sizeof (wchar_t) * outbuflen); 1008 if (outline == NULL) { 1009 (void) fprintf(stderr, 1010 gettext("%s: out of memory\n"), 1011 cmdname); 1012 exit(2); 1013 } 1014 } 1015 1016 len = mbstowcs(outline, ptr, line_len); 1017 if (len == (size_t)-1) { 1018 (void) fprintf(stderr, gettext( 1019 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1020 cmdname, fn, lineno); 1021 /* never match a line with invalid sequence */ 1022 goto L_skip_line; 1023 } 1024 outline[len] = L'\0'; 1025 1026 if (iflag) { 1027 wchar_t *cp; 1028 for (cp = outline; *cp != '\0'; cp++) { 1029 *cp = towlower((wint_t)*cp); 1030 } 1031 } 1032 1033 if (xflag) { 1034 for (pp = patterns; pp; pp = pp->next) { 1035 if (outline[0] == pp->wpattern[0] && 1036 wcscmp(outline, 1037 pp->wpattern) == 0) { 1038 /* matched */ 1039 break; 1040 } 1041 } 1042 } else { 1043 for (pp = patterns; pp; pp = pp->next) { 1044 if (wcswcs(outline, pp->wpattern) 1045 != NULL) { 1046 /* matched */ 1047 break; 1048 } 1049 } 1050 } 1051 } else if (Fflag) { 1052 /* fgrep in byte-oriented handling */ 1053 char *fptr; 1054 if (iflag) { 1055 fptr = istrdup(ptr); 1056 } else { 1057 fptr = ptr; 1058 } 1059 if (xflag) { 1060 /* fgrep -x */ 1061 for (pp = patterns; pp; pp = pp->next) { 1062 if (fptr[0] == pp->pattern[0] && 1063 strcmp(fptr, pp->pattern) == 0) { 1064 /* matched */ 1065 break; 1066 } 1067 } 1068 } else { 1069 for (pp = patterns; pp; pp = pp->next) { 1070 if (strstr(fptr, pp->pattern) != NULL) { 1071 /* matched */ 1072 break; 1073 } 1074 } 1075 } 1076 } else { 1077 /* grep or egrep */ 1078 for (pp = patterns; pp; pp = pp->next) { 1079 int rv; 1080 1081 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1082 if (rv == REG_OK) { 1083 /* matched */ 1084 break; 1085 } 1086 1087 switch (rv) { 1088 case REG_NOMATCH: 1089 break; 1090 case REG_ECHAR: 1091 (void) fprintf(stderr, gettext( 1092 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1093 cmdname, fn, lineno); 1094 break; 1095 default: 1096 (void) regerror(rv, &pp->re, errstr, 1097 sizeof (errstr)); 1098 (void) fprintf(stderr, gettext( 1099 "%s: input file \"%s\": line %lld: %s\n"), 1100 cmdname, fn, lineno, errstr); 1101 exit(2); 1102 } 1103 } 1104 } 1105 1106 L_next_line: 1107 /* 1108 * Here, if pp points to non-NULL, something has been matched 1109 * to the pattern. 1110 */ 1111 if (nvflag == (pp != NULL)) { 1112 matches++; 1113 /* 1114 * Handle q, l, and c flags. 1115 */ 1116 if (qflag) { 1117 /* no need to continue */ 1118 /* 1119 * End of this line is ptrend. 1120 * We have read up to ptr + data_len. 1121 */ 1122 off_t pos; 1123 pos = ptr + data_len - (ptrend + 1); 1124 (void) lseek(fd, -pos, SEEK_CUR); 1125 exit(0); 1126 } 1127 if (lflag) { 1128 (void) printf("%s\n", fn); 1129 break; 1130 } 1131 if (!cflag) { 1132 if (Hflag || outfn) { 1133 (void) printf("%s:", fn); 1134 } 1135 if (bflag) { 1136 (void) printf("%lld:", (offset_t) 1137 (line_offset / BSIZE)); 1138 } 1139 if (nflag) { 1140 (void) printf("%lld:", lineno); 1141 } 1142 *ptrend = '\n'; 1143 (void) fwrite(ptr, 1, line_len + 1, stdout); 1144 } 1145 if (ferror(stdout)) { 1146 return (0); 1147 } 1148 } 1149 L_skip_line: 1150 if (!newlinep) 1151 break; 1152 1153 data_len -= line_len + 1; 1154 line_offset += line_len + 1; 1155 ptr = ptrend + 1; 1156 } 1157 1158 if (cflag) { 1159 if (Hflag || outfn) { 1160 (void) printf("%s:", fn); 1161 } 1162 if (!qflag) { 1163 (void) printf("%lld\n", matches); 1164 } 1165 } 1166 return (matches != 0); 1167 } 1168 1169 /* 1170 * usage message for grep 1171 */ 1172 static void 1173 usage(void) 1174 { 1175 if (egrep || fgrep) { 1176 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1177 (void) fprintf(stderr, 1178 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1179 "pattern_list [file ...]\n")); 1180 1181 (void) fprintf(stderr, "\t%s", cmdname); 1182 (void) fprintf(stderr, 1183 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1184 "[-e pattern_list]... " 1185 "[-f pattern_file]... [file...]\n")); 1186 } else { 1187 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1188 (void) fprintf(stderr, 1189 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] " 1190 "pattern_list [file ...]\n")); 1191 1192 (void) fprintf(stderr, "\t%s", cmdname); 1193 (void) fprintf(stderr, 1194 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] " 1195 "[-e pattern_list]... " 1196 "[-f pattern_file]... [file...]\n")); 1197 1198 (void) fprintf(stderr, "\t%s", cmdname); 1199 (void) fprintf(stderr, 1200 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1201 "pattern_list [file ...]\n")); 1202 1203 (void) fprintf(stderr, "\t%s", cmdname); 1204 (void) fprintf(stderr, 1205 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1206 "[-e pattern_list]... " 1207 "[-f pattern_file]... [file...]\n")); 1208 1209 (void) fprintf(stderr, "\t%s", cmdname); 1210 (void) fprintf(stderr, 1211 gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1212 "pattern_list [file ...]\n")); 1213 1214 (void) fprintf(stderr, "\t%s", cmdname); 1215 (void) fprintf(stderr, 1216 gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... " 1217 "[-f pattern_file]... [file...]\n")); 1218 } 1219 exit(2); 1220 /* NOTREACHED */ 1221 } 1222 1223 /* 1224 * Compile literal pattern into BMG tables 1225 */ 1226 static void 1227 bmgcomp(char *pat, int len) 1228 { 1229 int i; 1230 int tlen; 1231 unsigned char *uc = (unsigned char *)pat; 1232 1233 bmglen = len; 1234 bmgpat = pat; 1235 1236 for (i = 0; i < M_CSETSIZE; i++) { 1237 bmgtab[i] = len; 1238 } 1239 1240 len--; 1241 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1242 bmgtab[*uc++] = tlen; 1243 } 1244 } 1245 1246 /* 1247 * BMG search. 1248 */ 1249 static char * 1250 bmgexec(char *str, char *end) 1251 { 1252 int t; 1253 char *k, *s, *p; 1254 1255 k = str + bmglen - 1; 1256 if (bmglen == 1) { 1257 return (memchr(str, bmgpat[0], end - str)); 1258 } 1259 for (; ; ) { 1260 /* inner loop, should be most optimized */ 1261 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1262 k += t; 1263 } 1264 if (k >= end) { 1265 return (NULL); 1266 } 1267 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1268 if (p == bmgpat) { 1269 return (s); 1270 } 1271 } 1272 k++; 1273 } 1274 /* NOTREACHED */ 1275 }