1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
30 */
31
32 /*
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 *
35 */
36
37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54 #include <ftw.h>
55 #include <sys/param.h>
56
57 #define BSIZE 512 /* Size of block for -b */
58 #define BUFSIZE 8192 /* Input buffer size */
59 #define MAX_DEPTH 1000 /* how deep to recurse */
60
61 #define M_CSETSIZE 256 /* singlebyte chars */
62 static int bmglen; /* length of BMG pattern */
63 static char *bmgpat; /* BMG pattern */
64 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
65
66 typedef struct _PATTERN {
67 char *pattern; /* original pattern */
68 wchar_t *wpattern; /* wide, lowercased pattern */
69 struct _PATTERN *next;
70 regex_t re; /* compiled pattern */
71 } PATTERN;
72
73 static PATTERN *patterns;
74 static char errstr[128]; /* regerror string buffer */
75 static int regflags = 0; /* regcomp options */
76 static int matched = 0; /* return of the grep() */
77 static int errors = 0; /* count of errors */
78 static uchar_t fgrep = 0; /* Invoked as fgrep */
79 static uchar_t egrep = 0; /* Invoked as egrep */
80 static uchar_t nvflag = 1; /* Print matching lines */
81 static uchar_t cflag; /* Count of matches */
82 static uchar_t iflag; /* Case insensitve matching */
83 static uchar_t hflag; /* Supress printing of filename */
84 static uchar_t lflag; /* Print file names of matches */
85 static uchar_t nflag; /* Precede lines by line number */
86 static uchar_t rflag; /* Search directories recursively */
87 static uchar_t bflag; /* Preccede matches by block number */
88 static uchar_t sflag; /* Suppress file error messages */
89 static uchar_t qflag; /* Suppress standard output */
90 static uchar_t wflag; /* Search for expression as a word */
91 static uchar_t xflag; /* Anchoring */
92 static uchar_t Eflag; /* Egrep or -E flag */
93 static uchar_t Fflag; /* Fgrep or -F flag */
94 static uchar_t Rflag; /* Like rflag, but follow symlinks */
95 static uchar_t outfn; /* Put out file name */
96 static char *cmdname;
97
98 static int use_wchar, use_bmg, mblocale;
99
100 static size_t outbuflen, prntbuflen;
101 static char *prntbuf;
102 static wchar_t *outline;
103
104 static void addfile(const char *fn);
105 static void addpattern(char *s);
106 static void fixpatterns(void);
107 static void usage(void);
108 static int grep(int, const char *);
109 static void bmgcomp(char *, int);
110 static char *bmgexec(char *, char *);
111 static int recursive(const char *, const struct stat *, int, struct FTW *);
112 static void process_path(const char *);
113 static void process_file(const char *, int);
114
115 /*
116 * mainline for grep
117 */
118 int
119 main(int argc, char **argv)
120 {
121 char *ap;
122 int c;
123 int fflag = 0;
124 int i, n_pattern = 0, n_file = 0;
125 char **pattern_list = NULL;
126 char **file_list = NULL;
127
128 (void) setlocale(LC_ALL, "");
129 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
130 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
131 #endif
132 (void) textdomain(TEXT_DOMAIN);
133
134 /*
135 * true if this is running on the multibyte locale
136 */
137 mblocale = (MB_CUR_MAX > 1);
138 /*
139 * Skip leading slashes
140 */
141 cmdname = argv[0];
142 if (ap = strrchr(cmdname, '/'))
143 cmdname = ap + 1;
144
145 ap = cmdname;
146 /*
147 * Detect egrep/fgrep via command name, map to -E and -F options.
148 */
149 if (*ap == 'e' || *ap == 'E') {
150 regflags |= REG_EXTENDED;
151 egrep++;
152 } else {
153 if (*ap == 'f' || *ap == 'F') {
154 fgrep++;
155 }
156 }
157
158 while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
159 switch (c) {
160 case 'v': /* POSIX: negate matches */
161 nvflag = 0;
162 break;
163
164 case 'c': /* POSIX: write count */
165 cflag++;
166 break;
167
168 case 'i': /* POSIX: ignore case */
169 iflag++;
170 regflags |= REG_ICASE;
171 break;
172
173 case 'l': /* POSIX: Write filenames only */
174 lflag++;
175 break;
176
177 case 'n': /* POSIX: Write line numbers */
178 nflag++;
179 break;
180
181 case 'r': /* Solaris: search recursively */
182 rflag++;
183 break;
184
185 case 'b': /* Solaris: Write file block numbers */
186 bflag++;
187 break;
188
189 case 's': /* POSIX: No error msgs for files */
190 sflag++;
191 break;
192
193 case 'e': /* POSIX: pattern list */
194 n_pattern++;
195 pattern_list = realloc(pattern_list,
196 sizeof (char *) * n_pattern);
197 if (pattern_list == NULL) {
198 (void) fprintf(stderr,
199 gettext("%s: out of memory\n"),
200 cmdname);
201 exit(2);
202 }
203 *(pattern_list + n_pattern - 1) = optarg;
204 break;
205
206 case 'f': /* POSIX: pattern file */
207 fflag = 1;
208 n_file++;
209 file_list = realloc(file_list,
210 sizeof (char *) * n_file);
211 if (file_list == NULL) {
212 (void) fprintf(stderr,
213 gettext("%s: out of memory\n"),
214 cmdname);
215 exit(2);
216 }
217 *(file_list + n_file - 1) = optarg;
218 break;
219 case 'h': /* Solaris: supress printing of file name */
220 hflag = 1;
221 break;
222
223 case 'q': /* POSIX: quiet: status only */
224 qflag++;
225 break;
226
227 case 'w': /* Solaris: treat pattern as word */
228 wflag++;
229 break;
230
231 case 'x': /* POSIX: full line matches */
232 xflag++;
233 regflags |= REG_ANCHOR;
234 break;
235
236 case 'E': /* POSIX: Extended RE's */
237 regflags |= REG_EXTENDED;
238 Eflag++;
239 break;
240
241 case 'F': /* POSIX: strings, not RE's */
242 Fflag++;
243 break;
244
245 case 'R': /* Solaris: like rflag, but follow symlinks */
246 Rflag++;
247 rflag++;
248 break;
249
250 default:
251 usage();
252 }
253 }
254 /*
255 * If we're invoked as egrep or fgrep we need to do some checks
256 */
257
258 if (egrep || fgrep) {
259 /*
260 * Use of -E or -F with egrep or fgrep is illegal
261 */
262 if (Eflag || Fflag)
263 usage();
264 /*
265 * Don't allow use of wflag with egrep / fgrep
266 */
267 if (wflag)
268 usage();
269 /*
270 * For Solaris the -s flag is equivalent to XCU -q
271 */
272 if (sflag)
273 qflag++;
274 /*
275 * done with above checks - set the appropriate flags
276 */
277 if (egrep)
278 Eflag++;
279 else /* Else fgrep */
280 Fflag++;
281 }
282
283 if (wflag && (Eflag || Fflag)) {
284 /*
285 * -w cannot be specified with grep -F
286 */
287 usage();
288 }
289
290 /*
291 * -E and -F flags are mutually exclusive - check for this
292 */
293 if (Eflag && Fflag)
294 usage();
295
296 /*
297 * -c, -l and -q flags are mutually exclusive
298 * We have -c override -l like in Solaris.
299 * -q overrides -l & -c programmatically in grep() function.
300 */
301 if (cflag && lflag)
302 lflag = 0;
303
304 argv += optind - 1;
305 argc -= optind - 1;
306
307 /*
308 * Now handling -e and -f option
309 */
310 if (pattern_list) {
311 for (i = 0; i < n_pattern; i++) {
312 addpattern(pattern_list[i]);
313 }
314 free(pattern_list);
315 }
316 if (file_list) {
317 for (i = 0; i < n_file; i++) {
318 addfile(file_list[i]);
319 }
320 free(file_list);
321 }
322
323 /*
324 * No -e or -f? Make sure there is one more arg, use it as the pattern.
325 */
326 if (patterns == NULL && !fflag) {
327 if (argc < 2)
328 usage();
329 addpattern(argv[1]);
330 argc--;
331 argv++;
332 }
333
334 /*
335 * If -x flag is not specified or -i flag is specified
336 * with fgrep in a multibyte locale, need to use
337 * the wide character APIs. Otherwise, byte-oriented
338 * process will be done.
339 */
340 use_wchar = Fflag && mblocale && (!xflag || iflag);
341
342 /*
343 * Compile Patterns and also decide if BMG can be used
344 */
345 fixpatterns();
346
347 /* Process all files: stdin, or rest of arg list */
348 if (argc < 2) {
349 matched = grep(0, gettext("(standard input)"));
350 } else {
351 if (argc > 2 && hflag == 0)
352 outfn = 1; /* Print filename on match line */
353 for (argv++; *argv != NULL; argv++) {
354 process_path(*argv);
355 }
356 }
357 /*
358 * Return() here is used instead of exit
359 */
360
361 (void) fflush(stdout);
362
363 if (errors)
364 return (2);
365 return (matched ? 0 : 1);
366 }
367
368 static void
369 process_path(const char *path)
370 {
371 struct stat st;
372 int walkflags = FTW_CHDIR;
373 char *buf = NULL;
374
375 if (rflag) {
376 if (stat(path, &st) != -1 &&
377 (st.st_mode & S_IFMT) == S_IFDIR) {
378 outfn = 1; /* Print filename */
379
380 /*
381 * Add trailing slash if arg
382 * is directory, to resolve symlinks.
383 */
384 if (path[strlen(path) - 1] != '/') {
385 (void) asprintf(&buf, "%s/", path);
386 if (buf != NULL)
387 path = buf;
388 }
389
390 /*
391 * Search through subdirs if path is directory.
392 * Don't follow symlinks if Rflag is not set.
393 */
394 if (!Rflag)
395 walkflags |= FTW_PHYS;
396
397 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
398 if (!sflag)
399 (void) fprintf(stderr,
400 gettext("%s: can't open \"%s\"\n"),
401 cmdname, path);
402 errors = 1;
403 }
404 return;
405 }
406 }
407 process_file(path, 0);
408 }
409
410 /*
411 * Read and process all files in directory recursively.
412 */
413 static int
414 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
415 {
416 /*
417 * Process files and follow symlinks if Rflag set.
418 */
419 if (info != FTW_F) {
420 /* Report broken symlinks and unreadable files */
421 if (!sflag &&
422 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
423 (void) fprintf(stderr,
424 gettext("%s: can't open \"%s\"\n"), cmdname, name);
425 }
426 return (0);
427 }
428
429
430 /* Skip devices and pipes if Rflag is not set */
431 if (!Rflag && !S_ISREG(statp->st_mode))
432 return (0);
433 /* Pass offset to relative name from FTW_CHDIR */
434 process_file(name, ftw->base);
435 return (0);
436 }
437
438 /*
439 * Opens file and call grep function.
440 */
441 static void
442 process_file(const char *name, int base)
443 {
444 int fd;
445
446 if ((fd = open(name + base, O_RDONLY)) == -1) {
447 errors = 1;
448 if (!sflag) /* Silent mode */
449 (void) fprintf(stderr, gettext(
450 "%s: can't open \"%s\"\n"),
451 cmdname, name);
452 return;
453 }
454 matched |= grep(fd, name);
455 (void) close(fd);
456
457 if (ferror(stdout)) {
458 (void) fprintf(stderr, gettext(
459 "%s: error writing to stdout\n"),
460 cmdname);
461 (void) fflush(stdout);
462 exit(2);
463 }
464
465 }
466
467 /*
468 * Add a file of strings to the pattern list.
469 */
470 static void
471 addfile(const char *fn)
472 {
473 FILE *fp;
474 char *inbuf;
475 char *bufp;
476 size_t bufsiz, buflen, bufused;
477
478 /*
479 * Open the pattern file
480 */
481 if ((fp = fopen(fn, "r")) == NULL) {
482 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
483 cmdname, fn);
484 exit(2);
485 }
486 bufsiz = BUFSIZE;
487 if ((inbuf = malloc(bufsiz)) == NULL) {
488 (void) fprintf(stderr,
489 gettext("%s: out of memory\n"), cmdname);
490 exit(2);
491 }
492 bufp = inbuf;
493 bufused = 0;
494 /*
495 * Read in the file, reallocing as we need more memory
496 */
497 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
498 buflen = strlen(bufp);
499 bufused += buflen;
500 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
501 /*
502 * if this line does not fit to the buffer,
503 * realloc larger buffer
504 */
505 bufsiz += BUFSIZE;
506 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
507 (void) fprintf(stderr,
508 gettext("%s: out of memory\n"),
509 cmdname);
510 exit(2);
511 }
512 bufp = inbuf + bufused;
513 continue;
514 }
515 if (bufp[buflen - 1] == '\n') {
516 bufp[--buflen] = '\0';
517 }
518 addpattern(inbuf);
519
520 bufp = inbuf;
521 bufused = 0;
522 }
523 free(inbuf);
524 (void) fclose(fp);
525 }
526
527 /*
528 * Add a string to the pattern list.
529 */
530 static void
531 addpattern(char *s)
532 {
533 PATTERN *pp;
534 char *wordbuf;
535 char *np;
536
537 for (; ; ) {
538 np = strchr(s, '\n');
539 if (np != NULL)
540 *np = '\0';
541 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
542 (void) fprintf(stderr, gettext(
543 "%s: out of memory\n"),
544 cmdname);
545 exit(2);
546 }
547 if (wflag) {
548 /*
549 * Solaris wflag support: Add '<' '>' to pattern to
550 * select it as a word. Doesn't make sense with -F
551 * but we're Libertarian.
552 */
553 size_t slen, wordlen;
554
555 slen = strlen(s);
556 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
557 if ((wordbuf = malloc(wordlen)) == NULL) {
558 (void) fprintf(stderr,
559 gettext("%s: out of memory\n"),
560 cmdname);
561 exit(2);
562 }
563 (void) strcpy(wordbuf, "\\<");
564 (void) strcpy(wordbuf + 2, s);
565 (void) strcpy(wordbuf + 2 + slen, "\\>");
566 } else {
567 if ((wordbuf = strdup(s)) == NULL) {
568 (void) fprintf(stderr,
569 gettext("%s: out of memory\n"),
570 cmdname);
571 exit(2);
572 }
573 }
574 pp->pattern = wordbuf;
575 pp->next = patterns;
576 patterns = pp;
577 if (np == NULL)
578 break;
579 s = np + 1;
580 }
581 }
582
583 /*
584 * Fix patterns.
585 * Must do after all arguments read, in case later -i option.
586 */
587 static void
588 fixpatterns(void)
589 {
590 PATTERN *pp;
591 int rv, fix_pattern, npatterns;
592
593 /*
594 * As REG_ANCHOR flag is not supported in the current Solaris,
595 * need to fix the specified pattern if -x is specified with
596 * grep or egrep
597 */
598 fix_pattern = !Fflag && xflag;
599
600 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
601 npatterns++;
602 if (fix_pattern) {
603 char *cp, *cq;
604 size_t plen, nplen;
605
606 plen = strlen(pp->pattern);
607 /* '^' pattern '$' */
608 nplen = 1 + plen + 1 + 1;
609 if ((cp = malloc(nplen)) == NULL) {
610 (void) fprintf(stderr,
611 gettext("%s: out of memory\n"),
612 cmdname);
613 exit(2);
614 }
615 cq = cp;
616 *cq++ = '^';
617 cq = strcpy(cq, pp->pattern) + plen;
618 *cq++ = '$';
619 *cq = '\0';
620 free(pp->pattern);
621 pp->pattern = cp;
622 }
623
624 if (Fflag) {
625 if (use_wchar) {
626 /*
627 * Fflag && mblocale && iflag
628 * Fflag && mblocale && !xflag
629 */
630 size_t n;
631 n = strlen(pp->pattern) + 1;
632 if ((pp->wpattern =
633 malloc(sizeof (wchar_t) * n)) == NULL) {
634 (void) fprintf(stderr,
635 gettext("%s: out of memory\n"),
636 cmdname);
637 exit(2);
638 }
639 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
640 (size_t)-1) {
641 (void) fprintf(stderr,
642 gettext("%s: failed to convert "
643 "\"%s\" to wide-characters\n"),
644 cmdname, pp->pattern);
645 exit(2);
646 }
647 if (iflag) {
648 wchar_t *wp;
649 for (wp = pp->wpattern; *wp != L'\0';
650 wp++) {
651 *wp = towlower((wint_t)*wp);
652 }
653 }
654 free(pp->pattern);
655 } else {
656 /*
657 * Fflag && mblocale && !iflag
658 * Fflag && !mblocale && iflag
659 * Fflag && !mblocale && !iflag
660 */
661 if (iflag) {
662 unsigned char *cp;
663 for (cp = (unsigned char *)pp->pattern;
664 *cp != '\0'; cp++) {
665 *cp = tolower(*cp);
666 }
667 }
668 }
669 /*
670 * fgrep: No regular expressions.
671 */
672 continue;
673 }
674
675 /*
676 * For non-fgrep, compile the regular expression,
677 * give an informative error message, and exit if
678 * it didn't compile.
679 */
680 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
681 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
682 (void) fprintf(stderr,
683 gettext("%s: RE error in %s: %s\n"),
684 cmdname, pp->pattern, errstr);
685 exit(2);
686 }
687 free(pp->pattern);
688 }
689
690 /*
691 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
692 * Use the Boyer-Moore-Gosper algorithm if:
693 * - fgrep (Fflag)
694 * - singlebyte locale (!mblocale)
695 * - no ignoring case (!iflag)
696 * - no printing line numbers (!nflag)
697 * - no negating the output (nvflag)
698 * - only one pattern (npatterns == 1)
699 * - non zero length pattern (strlen(patterns->pattern) != 0)
700 *
701 * It's guaranteed patterns->pattern is still alive
702 * when Fflag && !mblocale.
703 */
704 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
705 (npatterns == 1) && (strlen(patterns->pattern) != 0);
706 }
707
708 /*
709 * Search a newline from the beginning of the string
710 */
711 static char *
712 find_nl(const char *ptr, size_t len)
713 {
714 while (len-- != 0) {
715 if (*ptr++ == '\n') {
716 return ((char *)--ptr);
717 }
718 }
719 return (NULL);
720 }
721
722 /*
723 * Search a newline from the end of the string
724 */
725 static char *
726 rfind_nl(const char *ptr, size_t len)
727 {
728 const char *uptr = ptr + len;
729 while (len--) {
730 if (*--uptr == '\n') {
731 return ((char *)uptr);
732 }
733 }
734 return (NULL);
735 }
736
737 /*
738 * Duplicate the specified string converting each character
739 * into a lower case.
740 */
741 static char *
742 istrdup(const char *s1)
743 {
744 static size_t ibuflen = 0;
745 static char *ibuf = NULL;
746 size_t slen;
747 char *p;
748
749 slen = strlen(s1);
750 if (slen >= ibuflen) {
751 /* ibuf does not fit to s1 */
752 ibuflen = slen + 1;
753 ibuf = realloc(ibuf, ibuflen);
754 if (ibuf == NULL) {
755 (void) fprintf(stderr,
756 gettext("%s: out of memory\n"), cmdname);
757 exit(2);
758 }
759 }
760 p = ibuf;
761 do {
762 *p++ = tolower(*s1);
763 } while (*s1++ != '\0');
764 return (ibuf);
765 }
766
767 /*
768 * Do grep on a single file.
769 * Return true in any lines matched.
770 *
771 * We have two strategies:
772 * The fast one is used when we have a single pattern with
773 * a string known to occur in the pattern. We can then
774 * do a BMG match on the whole buffer.
775 * This is an order of magnitude faster.
776 * Otherwise we split the buffer into lines,
777 * and check for a match on each line.
778 */
779 static int
780 grep(int fd, const char *fn)
781 {
782 PATTERN *pp;
783 off_t data_len; /* length of the data chunk */
784 off_t line_len; /* length of the current line */
785 off_t line_offset; /* current line's offset from the beginning */
786 long long lineno;
787 long long matches = 0; /* Number of matching lines */
788 int newlinep; /* 0 if the last line of file has no newline */
789 char *ptr, *ptrend;
790
791
792 if (patterns == NULL)
793 return (0); /* no patterns to match -- just return */
794
795 pp = patterns;
796
797 if (use_bmg) {
798 bmgcomp(pp->pattern, strlen(pp->pattern));
799 }
800
801 if (use_wchar && outline == NULL) {
802 outbuflen = BUFSIZE + 1;
803 outline = malloc(sizeof (wchar_t) * outbuflen);
804 if (outline == NULL) {
805 (void) fprintf(stderr, gettext("%s: out of memory\n"),
806 cmdname);
807 exit(2);
808 }
809 }
810
811 if (prntbuf == NULL) {
812 prntbuflen = BUFSIZE;
813 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
814 (void) fprintf(stderr, gettext("%s: out of memory\n"),
815 cmdname);
816 exit(2);
817 }
818 }
819
820 line_offset = 0;
821 lineno = 0;
822 newlinep = 1;
823 data_len = 0;
824 for (; ; ) {
825 long count;
826 off_t offset = 0;
827
828 if (data_len == 0) {
829 /*
830 * If no data in the buffer, reset ptr
831 */
832 ptr = prntbuf;
833 }
834 if (ptr == prntbuf) {
835 /*
836 * The current data chunk starts from prntbuf.
837 * This means either the buffer has no data
838 * or the buffer has no newline.
839 * So, read more data from input.
840 */
841 count = read(fd, ptr + data_len, prntbuflen - data_len);
842 if (count < 0) {
843 /* read error */
844 if (cflag) {
845 if (outfn && !rflag) {
846 (void) fprintf(stdout,
847 "%s:", fn);
848 }
849 if (!qflag && !rflag) {
850 (void) fprintf(stdout, "%lld\n",
851 matches);
852 }
853 }
854 return (0);
855 } else if (count == 0) {
856 /* no new data */
857 if (data_len == 0) {
858 /* end of file already reached */
859 break;
860 }
861 /* last line of file has no newline */
862 ptrend = ptr + data_len;
863 newlinep = 0;
864 goto L_start_process;
865 }
866 offset = data_len;
867 data_len += count;
868 }
869
870 /*
871 * Look for newline in the chunk
872 * between ptr + offset and ptr + data_len - offset.
873 */
874 ptrend = find_nl(ptr + offset, data_len - offset);
875 if (ptrend == NULL) {
876 /* no newline found in this chunk */
877 if (ptr > prntbuf) {
878 /*
879 * Move remaining data to the beginning
880 * of the buffer.
881 * Remaining data lie from ptr for
882 * data_len bytes.
883 */
884 (void) memmove(prntbuf, ptr, data_len);
885 }
886 if (data_len == prntbuflen) {
887 /*
888 * No enough room in the buffer
889 */
890 prntbuflen += BUFSIZE;
891 prntbuf = realloc(prntbuf, prntbuflen + 1);
892 if (prntbuf == NULL) {
893 (void) fprintf(stderr,
894 gettext("%s: out of memory\n"),
895 cmdname);
896 exit(2);
897 }
898 }
899 ptr = prntbuf;
900 /* read the next input */
901 continue;
902 }
903 L_start_process:
904
905 /*
906 * Beginning of the chunk: ptr
907 * End of the chunk: ptr + data_len
908 * Beginning of the line: ptr
909 * End of the line: ptrend
910 */
911
912 if (use_bmg) {
913 /*
914 * Use Boyer-Moore-Gosper algorithm to find out if
915 * this chunk (not this line) contains the specified
916 * pattern. If not, restart from the last line
917 * of this chunk.
918 */
919 char *bline;
920 bline = bmgexec(ptr, ptr + data_len);
921 if (bline == NULL) {
922 /*
923 * No pattern found in this chunk.
924 * Need to find the last line
925 * in this chunk.
926 */
927 ptrend = rfind_nl(ptr, data_len);
928
929 /*
930 * When this chunk does not contain newline,
931 * ptrend becomes NULL, which should happen
932 * when the last line of file does not end
933 * with a newline. At such a point,
934 * newlinep should have been set to 0.
935 * Therefore, just after jumping to
936 * L_skip_line, the main for-loop quits,
937 * and the line_len value won't be
938 * used.
939 */
940 line_len = ptrend - ptr;
941 goto L_skip_line;
942 }
943 if (bline > ptrend) {
944 /*
945 * Pattern found not in the first line
946 * of this chunk.
947 * Discard the first line.
948 */
949 line_len = ptrend - ptr;
950 goto L_skip_line;
951 }
952 /*
953 * Pattern found in the first line of this chunk.
954 * Using this result.
955 */
956 *ptrend = '\0';
957 line_len = ptrend - ptr;
958
959 /*
960 * before jumping to L_next_line,
961 * need to handle xflag if specified
962 */
963 if (xflag && (line_len != bmglen ||
964 strcmp(bmgpat, ptr) != 0)) {
965 /* didn't match */
966 pp = NULL;
967 } else {
968 pp = patterns; /* to make it happen */
969 }
970 goto L_next_line;
971 }
972 lineno++;
973 /*
974 * Line starts from ptr and ends at ptrend.
975 * line_len will be the length of the line.
976 */
977 *ptrend = '\0';
978 line_len = ptrend - ptr;
979
980 /*
981 * From now, the process will be performed based
982 * on the line from ptr to ptrend.
983 */
984 if (use_wchar) {
985 size_t len;
986
987 if (line_len >= outbuflen) {
988 outbuflen = line_len + 1;
989 outline = realloc(outline,
990 sizeof (wchar_t) * outbuflen);
991 if (outline == NULL) {
992 (void) fprintf(stderr,
993 gettext("%s: out of memory\n"),
994 cmdname);
995 exit(2);
996 }
997 }
998
999 len = mbstowcs(outline, ptr, line_len);
1000 if (len == (size_t)-1) {
1001 (void) fprintf(stderr, gettext(
1002 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1003 cmdname, fn, lineno);
1004 /* never match a line with invalid sequence */
1005 goto L_skip_line;
1006 }
1007 outline[len] = L'\0';
1008
1009 if (iflag) {
1010 wchar_t *cp;
1011 for (cp = outline; *cp != '\0'; cp++) {
1012 *cp = towlower((wint_t)*cp);
1013 }
1014 }
1015
1016 if (xflag) {
1017 for (pp = patterns; pp; pp = pp->next) {
1018 if (outline[0] == pp->wpattern[0] &&
1019 wcscmp(outline,
1020 pp->wpattern) == 0) {
1021 /* matched */
1022 break;
1023 }
1024 }
1025 } else {
1026 for (pp = patterns; pp; pp = pp->next) {
1027 if (wcswcs(outline, pp->wpattern)
1028 != NULL) {
1029 /* matched */
1030 break;
1031 }
1032 }
1033 }
1034 } else if (Fflag) {
1035 /* fgrep in byte-oriented handling */
1036 char *fptr;
1037 if (iflag) {
1038 fptr = istrdup(ptr);
1039 } else {
1040 fptr = ptr;
1041 }
1042 if (xflag) {
1043 /* fgrep -x */
1044 for (pp = patterns; pp; pp = pp->next) {
1045 if (fptr[0] == pp->pattern[0] &&
1046 strcmp(fptr, pp->pattern) == 0) {
1047 /* matched */
1048 break;
1049 }
1050 }
1051 } else {
1052 for (pp = patterns; pp; pp = pp->next) {
1053 if (strstr(fptr, pp->pattern) != NULL) {
1054 /* matched */
1055 break;
1056 }
1057 }
1058 }
1059 } else {
1060 /* grep or egrep */
1061 for (pp = patterns; pp; pp = pp->next) {
1062 int rv;
1063
1064 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1065 if (rv == REG_OK) {
1066 /* matched */
1067 break;
1068 }
1069
1070 switch (rv) {
1071 case REG_NOMATCH:
1072 break;
1073 case REG_ECHAR:
1074 (void) fprintf(stderr, gettext(
1075 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1076 cmdname, fn, lineno);
1077 break;
1078 default:
1079 (void) regerror(rv, &pp->re, errstr,
1080 sizeof (errstr));
1081 (void) fprintf(stderr, gettext(
1082 "%s: input file \"%s\": line %lld: %s\n"),
1083 cmdname, fn, lineno, errstr);
1084 exit(2);
1085 }
1086 }
1087 }
1088
1089 L_next_line:
1090 /*
1091 * Here, if pp points to non-NULL, something has been matched
1092 * to the pattern.
1093 */
1094 if (nvflag == (pp != NULL)) {
1095 matches++;
1096 /*
1097 * Handle q, l, and c flags.
1098 */
1099 if (qflag) {
1100 /* no need to continue */
1101 /*
1102 * End of this line is ptrend.
1103 * We have read up to ptr + data_len.
1104 */
1105 off_t pos;
1106 pos = ptr + data_len - (ptrend + 1);
1107 (void) lseek(fd, -pos, SEEK_CUR);
1108 exit(0);
1109 }
1110 if (lflag) {
1111 (void) printf("%s\n", fn);
1112 break;
1113 }
1114 if (!cflag) {
1115 if (outfn) {
1116 (void) printf("%s:", fn);
1117 }
1118 if (bflag) {
1119 (void) printf("%lld:", (offset_t)
1120 (line_offset / BSIZE));
1121 }
1122 if (nflag) {
1123 (void) printf("%lld:", lineno);
1124 }
1125 *ptrend = '\n';
1126 (void) fwrite(ptr, 1, line_len + 1, stdout);
1127 }
1128 if (ferror(stdout)) {
1129 return (0);
1130 }
1131 }
1132 L_skip_line:
1133 if (!newlinep)
1134 break;
1135
1136 data_len -= line_len + 1;
1137 line_offset += line_len + 1;
1138 ptr = ptrend + 1;
1139 }
1140
1141 if (cflag) {
1142 if (outfn) {
1143 (void) printf("%s:", fn);
1144 }
1145 if (!qflag) {
1146 (void) printf("%lld\n", matches);
1147 }
1148 }
1149 return (matches != 0);
1150 }
1151
1152 /*
1153 * usage message for grep
1154 */
1155 static void
1156 usage(void)
1157 {
1158 if (egrep || fgrep) {
1159 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1160 (void) fprintf(stderr,
1161 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1162 "pattern_list [file ...]\n"));
1163
1164 (void) fprintf(stderr, "\t%s", cmdname);
1165 (void) fprintf(stderr,
1166 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1167 "[-e pattern_list]... "
1168 "[-f pattern_file]... [file...]\n"));
1169 } else {
1170 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1171 (void) fprintf(stderr,
1172 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1173 "pattern_list [file ...]\n"));
1174
1175 (void) fprintf(stderr, "\t%s", cmdname);
1176 (void) fprintf(stderr,
1177 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1178 "[-e pattern_list]... "
1179 "[-f pattern_file]... [file...]\n"));
1180
1181 (void) fprintf(stderr, "\t%s", cmdname);
1182 (void) fprintf(stderr,
1183 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1184 "pattern_list [file ...]\n"));
1185
1186 (void) fprintf(stderr, "\t%s", cmdname);
1187 (void) fprintf(stderr,
1188 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1189 "[-e pattern_list]... "
1190 "[-f pattern_file]... [file...]\n"));
1191
1192 (void) fprintf(stderr, "\t%s", cmdname);
1193 (void) fprintf(stderr,
1194 gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1195 "pattern_list [file ...]\n"));
1196
1197 (void) fprintf(stderr, "\t%s", cmdname);
1198 (void) fprintf(stderr,
1199 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1200 "[-f pattern_file]... [file...]\n"));
1201 }
1202 exit(2);
1203 /* NOTREACHED */
1204 }
1205
1206 /*
1207 * Compile literal pattern into BMG tables
1208 */
1209 static void
1210 bmgcomp(char *pat, int len)
1211 {
1212 int i;
1213 int tlen;
1214 unsigned char *uc = (unsigned char *)pat;
1215
1216 bmglen = len;
1217 bmgpat = pat;
1218
1219 for (i = 0; i < M_CSETSIZE; i++) {
1220 bmgtab[i] = len;
1221 }
1222
1223 len--;
1224 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1225 bmgtab[*uc++] = tlen;
1226 }
1227 }
1228
1229 /*
1230 * BMG search.
1231 */
1232 static char *
1233 bmgexec(char *str, char *end)
1234 {
1235 int t;
1236 char *k, *s, *p;
1237
1238 k = str + bmglen - 1;
1239 if (bmglen == 1) {
1240 return (memchr(str, bmgpat[0], end - str));
1241 }
1242 for (; ; ) {
1243 /* inner loop, should be most optimized */
1244 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1245 k += t;
1246 }
1247 if (k >= end) {
1248 return (NULL);
1249 }
1250 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1251 if (p == bmgpat) {
1252 return (s);
1253 }
1254 }
1255 k++;
1256 }
1257 /* NOTREACHED */
1258 }