1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
30 */
31
32 /*
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 *
35 */
36
37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54 #include <ftw.h>
55 #include <sys/param.h>
56
57 #define STDIN_FILENAME gettext("(standard input)")
58
59 #define BSIZE 512 /* Size of block for -b */
60 #define BUFSIZE 8192 /* Input buffer size */
61 #define MAX_DEPTH 1000 /* how deep to recurse */
62
63 #define M_CSETSIZE 256 /* singlebyte chars */
64 static int bmglen; /* length of BMG pattern */
65 static char *bmgpat; /* BMG pattern */
66 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
67
68 typedef struct _PATTERN {
69 char *pattern; /* original pattern */
70 wchar_t *wpattern; /* wide, lowercased pattern */
71 struct _PATTERN *next;
72 regex_t re; /* compiled pattern */
73 } PATTERN;
74
75 static PATTERN *patterns;
76 static char errstr[128]; /* regerror string buffer */
77 static int regflags = 0; /* regcomp options */
78 static int matched = 0; /* return of the grep() */
79 static int errors = 0; /* count of errors */
80 static uchar_t fgrep = 0; /* Invoked as fgrep */
81 static uchar_t egrep = 0; /* Invoked as egrep */
82 static uchar_t nvflag = 1; /* Print matching lines */
83 static uchar_t cflag; /* Count of matches */
84 static uchar_t iflag; /* Case insensitve matching */
85 static uchar_t Hflag; /* Precede lines by file name */
86 static uchar_t hflag; /* Supress printing of filename */
87 static uchar_t lflag; /* Print file names of matches */
88 static uchar_t nflag; /* Precede lines by line number */
89 static uchar_t rflag; /* Search directories recursively */
90 static uchar_t bflag; /* Preccede matches by block number */
91 static uchar_t sflag; /* Suppress file error messages */
92 static uchar_t qflag; /* Suppress standard output */
93 static uchar_t wflag; /* Search for expression as a word */
94 static uchar_t xflag; /* Anchoring */
95 static uchar_t Eflag; /* Egrep or -E flag */
96 static uchar_t Fflag; /* Fgrep or -F flag */
97 static uchar_t Rflag; /* Like rflag, but follow symlinks */
98 static uchar_t outfn; /* Put out file name */
99 static char *cmdname;
100
101 static int use_wchar, use_bmg, mblocale;
102
103 static size_t outbuflen, prntbuflen;
104 static char *prntbuf;
105 static wchar_t *outline;
106
107 static void addfile(const char *fn);
108 static void addpattern(char *s);
109 static void fixpatterns(void);
110 static void usage(void);
111 static int grep(int, const char *);
112 static void bmgcomp(char *, int);
113 static char *bmgexec(char *, char *);
114 static int recursive(const char *, const struct stat *, int, struct FTW *);
115 static void process_path(const char *);
116 static void process_file(const char *, int);
117
118 /*
119 * mainline for grep
120 */
121 int
122 main(int argc, char **argv)
123 {
124 char *ap;
125 int c;
126 int fflag = 0;
127 int i, n_pattern = 0, n_file = 0;
128 char **pattern_list = NULL;
129 char **file_list = NULL;
130
131 (void) setlocale(LC_ALL, "");
132 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
133 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
134 #endif
135 (void) textdomain(TEXT_DOMAIN);
136
137 /*
138 * true if this is running on the multibyte locale
139 */
140 mblocale = (MB_CUR_MAX > 1);
141 /*
142 * Skip leading slashes
143 */
144 cmdname = argv[0];
145 if (ap = strrchr(cmdname, '/'))
146 cmdname = ap + 1;
147
148 ap = cmdname;
149 /*
150 * Detect egrep/fgrep via command name, map to -E and -F options.
151 */
152 if (*ap == 'e' || *ap == 'E') {
153 regflags |= REG_EXTENDED;
154 egrep++;
155 } else {
156 if (*ap == 'f' || *ap == 'F') {
157 fgrep++;
158 }
159 }
160
161 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
162 switch (c) {
163 case 'v': /* POSIX: negate matches */
164 nvflag = 0;
165 break;
166
167 case 'c': /* POSIX: write count */
168 cflag++;
169 break;
170
171 case 'i': /* POSIX: ignore case */
172 iflag++;
173 regflags |= REG_ICASE;
174 break;
175
176 case 'l': /* POSIX: Write filenames only */
177 lflag++;
178 break;
179
180 case 'n': /* POSIX: Write line numbers */
181 nflag++;
182 break;
183
184 case 'r': /* Solaris: search recursively */
185 rflag++;
186 break;
187
188 case 'b': /* Solaris: Write file block numbers */
189 bflag++;
190 break;
191
192 case 's': /* POSIX: No error msgs for files */
193 sflag++;
194 break;
195
196 case 'e': /* POSIX: pattern list */
197 n_pattern++;
198 pattern_list = realloc(pattern_list,
199 sizeof (char *) * n_pattern);
200 if (pattern_list == NULL) {
201 (void) fprintf(stderr,
202 gettext("%s: out of memory\n"),
203 cmdname);
204 exit(2);
205 }
206 *(pattern_list + n_pattern - 1) = optarg;
207 break;
208
209 case 'f': /* POSIX: pattern file */
210 fflag = 1;
211 n_file++;
212 file_list = realloc(file_list,
213 sizeof (char *) * n_file);
214 if (file_list == NULL) {
215 (void) fprintf(stderr,
216 gettext("%s: out of memory\n"),
217 cmdname);
218 exit(2);
219 }
220 *(file_list + n_file - 1) = optarg;
221 break;
222
223 /* based on options order h or H is set as in GNU grep */
224 case 'h': /* Solaris: supress printing of file name */
225 hflag = 1;
226 Hflag = 0;
227 break;
228 /* Solaris: precede every matching with file name */
229 case 'H':
230 Hflag = 1;
231 hflag = 0;
232 break;
233
234 case 'q': /* POSIX: quiet: status only */
235 qflag++;
236 break;
237
238 case 'w': /* Solaris: treat pattern as word */
239 wflag++;
240 break;
241
242 case 'x': /* POSIX: full line matches */
243 xflag++;
244 regflags |= REG_ANCHOR;
245 break;
246
247 case 'E': /* POSIX: Extended RE's */
248 regflags |= REG_EXTENDED;
249 Eflag++;
250 break;
251
252 case 'F': /* POSIX: strings, not RE's */
253 Fflag++;
254 break;
255
256 case 'R': /* Solaris: like rflag, but follow symlinks */
257 Rflag++;
258 rflag++;
259 break;
260
261 default:
262 usage();
263 }
264 }
265 /*
266 * If we're invoked as egrep or fgrep we need to do some checks
267 */
268
269 if (egrep || fgrep) {
270 /*
271 * Use of -E or -F with egrep or fgrep is illegal
272 */
273 if (Eflag || Fflag)
274 usage();
275 /*
276 * Don't allow use of wflag with egrep / fgrep
277 */
278 if (wflag)
279 usage();
280 /*
281 * For Solaris the -s flag is equivalent to XCU -q
282 */
283 if (sflag)
284 qflag++;
285 /*
286 * done with above checks - set the appropriate flags
287 */
288 if (egrep)
289 Eflag++;
290 else /* Else fgrep */
291 Fflag++;
292 }
293
294 if (wflag && (Eflag || Fflag)) {
295 /*
296 * -w cannot be specified with grep -F
297 */
298 usage();
299 }
300
301 /*
302 * -E and -F flags are mutually exclusive - check for this
303 */
304 if (Eflag && Fflag)
305 usage();
306
307 /*
308 * -l overrides -H like in GNU grep
309 */
310 if (lflag)
311 Hflag = 0;
312
313 /*
314 * -c, -l and -q flags are mutually exclusive
315 * We have -c override -l like in Solaris.
316 * -q overrides -l & -c programmatically in grep() function.
317 */
318 if (cflag && lflag)
319 lflag = 0;
320
321 argv += optind - 1;
322 argc -= optind - 1;
323
324 /*
325 * Now handling -e and -f option
326 */
327 if (pattern_list) {
328 for (i = 0; i < n_pattern; i++) {
329 addpattern(pattern_list[i]);
330 }
331 free(pattern_list);
332 }
333 if (file_list) {
334 for (i = 0; i < n_file; i++) {
335 addfile(file_list[i]);
336 }
337 free(file_list);
338 }
339
340 /*
341 * No -e or -f? Make sure there is one more arg, use it as the pattern.
342 */
343 if (patterns == NULL && !fflag) {
344 if (argc < 2)
345 usage();
346 addpattern(argv[1]);
347 argc--;
348 argv++;
349 }
350
351 /*
352 * If -x flag is not specified or -i flag is specified
353 * with fgrep in a multibyte locale, need to use
354 * the wide character APIs. Otherwise, byte-oriented
355 * process will be done.
356 */
357 use_wchar = Fflag && mblocale && (!xflag || iflag);
358
359 /*
360 * Compile Patterns and also decide if BMG can be used
361 */
362 fixpatterns();
363
364 /* Process all files: stdin, or rest of arg list */
365 if (argc < 2) {
366 matched = grep(0, STDIN_FILENAME);
367 } else {
368 if (Hflag || (argc > 2 && hflag == 0))
369 outfn = 1; /* Print filename on match line */
370 for (argv++; *argv != NULL; argv++) {
371 process_path(*argv);
372 }
373 }
374 /*
375 * Return() here is used instead of exit
376 */
377
378 (void) fflush(stdout);
379
380 if (errors)
381 return (2);
382 return (matched ? 0 : 1);
383 }
384
385 static void
386 process_path(const char *path)
387 {
388 struct stat st;
389 int walkflags = FTW_CHDIR;
390 char *buf = NULL;
391
392 if (rflag) {
393 if (stat(path, &st) != -1 &&
394 (st.st_mode & S_IFMT) == S_IFDIR) {
395 outfn = 1; /* Print filename */
396
397 /*
398 * Add trailing slash if arg
399 * is directory, to resolve symlinks.
400 */
401 if (path[strlen(path) - 1] != '/') {
402 (void) asprintf(&buf, "%s/", path);
403 if (buf != NULL)
404 path = buf;
405 }
406
407 /*
408 * Search through subdirs if path is directory.
409 * Don't follow symlinks if Rflag is not set.
410 */
411 if (!Rflag)
412 walkflags |= FTW_PHYS;
413
414 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
415 if (!sflag)
416 (void) fprintf(stderr,
417 gettext("%s: can't open \"%s\"\n"),
418 cmdname, path);
419 errors = 1;
420 }
421 return;
422 }
423 }
424 process_file(path, 0);
425 }
426
427 /*
428 * Read and process all files in directory recursively.
429 */
430 static int
431 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
432 {
433 /*
434 * Process files and follow symlinks if Rflag set.
435 */
436 if (info != FTW_F) {
437 /* Report broken symlinks and unreadable files */
438 if (!sflag &&
439 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
440 (void) fprintf(stderr,
441 gettext("%s: can't open \"%s\"\n"), cmdname, name);
442 }
443 return (0);
444 }
445
446
447 /* Skip devices and pipes if Rflag is not set */
448 if (!Rflag && !S_ISREG(statp->st_mode))
449 return (0);
450 /* Pass offset to relative name from FTW_CHDIR */
451 process_file(name, ftw->base);
452 return (0);
453 }
454
455 /*
456 * Opens file and call grep function.
457 */
458 static void
459 process_file(const char *name, int base)
460 {
461 int fd;
462
463 if ((fd = open(name + base, O_RDONLY)) == -1) {
464 errors = 1;
465 if (!sflag) /* Silent mode */
466 (void) fprintf(stderr, gettext(
467 "%s: can't open \"%s\"\n"),
468 cmdname, name);
469 return;
470 }
471 matched |= grep(fd, name);
472 (void) close(fd);
473
474 if (ferror(stdout)) {
475 (void) fprintf(stderr, gettext(
476 "%s: error writing to stdout\n"),
477 cmdname);
478 (void) fflush(stdout);
479 exit(2);
480 }
481
482 }
483
484 /*
485 * Add a file of strings to the pattern list.
486 */
487 static void
488 addfile(const char *fn)
489 {
490 FILE *fp;
491 char *inbuf;
492 char *bufp;
493 size_t bufsiz, buflen, bufused;
494
495 /*
496 * Open the pattern file
497 */
498 if ((fp = fopen(fn, "r")) == NULL) {
499 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
500 cmdname, fn);
501 exit(2);
502 }
503 bufsiz = BUFSIZE;
504 if ((inbuf = malloc(bufsiz)) == NULL) {
505 (void) fprintf(stderr,
506 gettext("%s: out of memory\n"), cmdname);
507 exit(2);
508 }
509 bufp = inbuf;
510 bufused = 0;
511 /*
512 * Read in the file, reallocing as we need more memory
513 */
514 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
515 buflen = strlen(bufp);
516 bufused += buflen;
517 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
518 /*
519 * if this line does not fit to the buffer,
520 * realloc larger buffer
521 */
522 bufsiz += BUFSIZE;
523 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
524 (void) fprintf(stderr,
525 gettext("%s: out of memory\n"),
526 cmdname);
527 exit(2);
528 }
529 bufp = inbuf + bufused;
530 continue;
531 }
532 if (bufp[buflen - 1] == '\n') {
533 bufp[--buflen] = '\0';
534 }
535 addpattern(inbuf);
536
537 bufp = inbuf;
538 bufused = 0;
539 }
540 free(inbuf);
541 (void) fclose(fp);
542 }
543
544 /*
545 * Add a string to the pattern list.
546 */
547 static void
548 addpattern(char *s)
549 {
550 PATTERN *pp;
551 char *wordbuf;
552 char *np;
553
554 for (; ; ) {
555 np = strchr(s, '\n');
556 if (np != NULL)
557 *np = '\0';
558 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
559 (void) fprintf(stderr, gettext(
560 "%s: out of memory\n"),
561 cmdname);
562 exit(2);
563 }
564 if (wflag) {
565 /*
566 * Solaris wflag support: Add '<' '>' to pattern to
567 * select it as a word. Doesn't make sense with -F
568 * but we're Libertarian.
569 */
570 size_t slen, wordlen;
571
572 slen = strlen(s);
573 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
574 if ((wordbuf = malloc(wordlen)) == NULL) {
575 (void) fprintf(stderr,
576 gettext("%s: out of memory\n"),
577 cmdname);
578 exit(2);
579 }
580 (void) strcpy(wordbuf, "\\<");
581 (void) strcpy(wordbuf + 2, s);
582 (void) strcpy(wordbuf + 2 + slen, "\\>");
583 } else {
584 if ((wordbuf = strdup(s)) == NULL) {
585 (void) fprintf(stderr,
586 gettext("%s: out of memory\n"),
587 cmdname);
588 exit(2);
589 }
590 }
591 pp->pattern = wordbuf;
592 pp->next = patterns;
593 patterns = pp;
594 if (np == NULL)
595 break;
596 s = np + 1;
597 }
598 }
599
600 /*
601 * Fix patterns.
602 * Must do after all arguments read, in case later -i option.
603 */
604 static void
605 fixpatterns(void)
606 {
607 PATTERN *pp;
608 int rv, fix_pattern, npatterns;
609
610 /*
611 * As REG_ANCHOR flag is not supported in the current Solaris,
612 * need to fix the specified pattern if -x is specified with
613 * grep or egrep
614 */
615 fix_pattern = !Fflag && xflag;
616
617 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
618 npatterns++;
619 if (fix_pattern) {
620 char *cp, *cq;
621 size_t plen, nplen;
622
623 plen = strlen(pp->pattern);
624 /* '^' pattern '$' */
625 nplen = 1 + plen + 1 + 1;
626 if ((cp = malloc(nplen)) == NULL) {
627 (void) fprintf(stderr,
628 gettext("%s: out of memory\n"),
629 cmdname);
630 exit(2);
631 }
632 cq = cp;
633 *cq++ = '^';
634 cq = strcpy(cq, pp->pattern) + plen;
635 *cq++ = '$';
636 *cq = '\0';
637 free(pp->pattern);
638 pp->pattern = cp;
639 }
640
641 if (Fflag) {
642 if (use_wchar) {
643 /*
644 * Fflag && mblocale && iflag
645 * Fflag && mblocale && !xflag
646 */
647 size_t n;
648 n = strlen(pp->pattern) + 1;
649 if ((pp->wpattern =
650 malloc(sizeof (wchar_t) * n)) == NULL) {
651 (void) fprintf(stderr,
652 gettext("%s: out of memory\n"),
653 cmdname);
654 exit(2);
655 }
656 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
657 (size_t)-1) {
658 (void) fprintf(stderr,
659 gettext("%s: failed to convert "
660 "\"%s\" to wide-characters\n"),
661 cmdname, pp->pattern);
662 exit(2);
663 }
664 if (iflag) {
665 wchar_t *wp;
666 for (wp = pp->wpattern; *wp != L'\0';
667 wp++) {
668 *wp = towlower((wint_t)*wp);
669 }
670 }
671 free(pp->pattern);
672 } else {
673 /*
674 * Fflag && mblocale && !iflag
675 * Fflag && !mblocale && iflag
676 * Fflag && !mblocale && !iflag
677 */
678 if (iflag) {
679 unsigned char *cp;
680 for (cp = (unsigned char *)pp->pattern;
681 *cp != '\0'; cp++) {
682 *cp = tolower(*cp);
683 }
684 }
685 }
686 /*
687 * fgrep: No regular expressions.
688 */
689 continue;
690 }
691
692 /*
693 * For non-fgrep, compile the regular expression,
694 * give an informative error message, and exit if
695 * it didn't compile.
696 */
697 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
698 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
699 (void) fprintf(stderr,
700 gettext("%s: RE error in %s: %s\n"),
701 cmdname, pp->pattern, errstr);
702 exit(2);
703 }
704 free(pp->pattern);
705 }
706
707 /*
708 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
709 * Use the Boyer-Moore-Gosper algorithm if:
710 * - fgrep (Fflag)
711 * - singlebyte locale (!mblocale)
712 * - no ignoring case (!iflag)
713 * - no printing line numbers (!nflag)
714 * - no negating the output (nvflag)
715 * - only one pattern (npatterns == 1)
716 * - non zero length pattern (strlen(patterns->pattern) != 0)
717 *
718 * It's guaranteed patterns->pattern is still alive
719 * when Fflag && !mblocale.
720 */
721 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
722 (npatterns == 1) && (strlen(patterns->pattern) != 0);
723 }
724
725 /*
726 * Search a newline from the beginning of the string
727 */
728 static char *
729 find_nl(const char *ptr, size_t len)
730 {
731 while (len-- != 0) {
732 if (*ptr++ == '\n') {
733 return ((char *)--ptr);
734 }
735 }
736 return (NULL);
737 }
738
739 /*
740 * Search a newline from the end of the string
741 */
742 static char *
743 rfind_nl(const char *ptr, size_t len)
744 {
745 const char *uptr = ptr + len;
746 while (len--) {
747 if (*--uptr == '\n') {
748 return ((char *)uptr);
749 }
750 }
751 return (NULL);
752 }
753
754 /*
755 * Duplicate the specified string converting each character
756 * into a lower case.
757 */
758 static char *
759 istrdup(const char *s1)
760 {
761 static size_t ibuflen = 0;
762 static char *ibuf = NULL;
763 size_t slen;
764 char *p;
765
766 slen = strlen(s1);
767 if (slen >= ibuflen) {
768 /* ibuf does not fit to s1 */
769 ibuflen = slen + 1;
770 ibuf = realloc(ibuf, ibuflen);
771 if (ibuf == NULL) {
772 (void) fprintf(stderr,
773 gettext("%s: out of memory\n"), cmdname);
774 exit(2);
775 }
776 }
777 p = ibuf;
778 do {
779 *p++ = tolower(*s1);
780 } while (*s1++ != '\0');
781 return (ibuf);
782 }
783
784 /*
785 * Do grep on a single file.
786 * Return true in any lines matched.
787 *
788 * We have two strategies:
789 * The fast one is used when we have a single pattern with
790 * a string known to occur in the pattern. We can then
791 * do a BMG match on the whole buffer.
792 * This is an order of magnitude faster.
793 * Otherwise we split the buffer into lines,
794 * and check for a match on each line.
795 */
796 static int
797 grep(int fd, const char *fn)
798 {
799 PATTERN *pp;
800 off_t data_len; /* length of the data chunk */
801 off_t line_len; /* length of the current line */
802 off_t line_offset; /* current line's offset from the beginning */
803 long long lineno;
804 long long matches = 0; /* Number of matching lines */
805 int newlinep; /* 0 if the last line of file has no newline */
806 char *ptr, *ptrend;
807
808
809 if (patterns == NULL)
810 return (0); /* no patterns to match -- just return */
811
812 pp = patterns;
813
814 if (use_bmg) {
815 bmgcomp(pp->pattern, strlen(pp->pattern));
816 }
817
818 if (use_wchar && outline == NULL) {
819 outbuflen = BUFSIZE + 1;
820 outline = malloc(sizeof (wchar_t) * outbuflen);
821 if (outline == NULL) {
822 (void) fprintf(stderr, gettext("%s: out of memory\n"),
823 cmdname);
824 exit(2);
825 }
826 }
827
828 if (prntbuf == NULL) {
829 prntbuflen = BUFSIZE;
830 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
831 (void) fprintf(stderr, gettext("%s: out of memory\n"),
832 cmdname);
833 exit(2);
834 }
835 }
836
837 line_offset = 0;
838 lineno = 0;
839 newlinep = 1;
840 data_len = 0;
841 for (; ; ) {
842 long count;
843 off_t offset = 0;
844
845 if (data_len == 0) {
846 /*
847 * If no data in the buffer, reset ptr
848 */
849 ptr = prntbuf;
850 }
851 if (ptr == prntbuf) {
852 /*
853 * The current data chunk starts from prntbuf.
854 * This means either the buffer has no data
855 * or the buffer has no newline.
856 * So, read more data from input.
857 */
858 count = read(fd, ptr + data_len, prntbuflen - data_len);
859 if (count < 0) {
860 /* read error */
861 if (cflag) {
862 if (outfn && !rflag) {
863 (void) fprintf(stdout,
864 "%s:", fn);
865 }
866 if (!qflag && !rflag) {
867 (void) fprintf(stdout, "%lld\n",
868 matches);
869 }
870 }
871 return (0);
872 } else if (count == 0) {
873 /* no new data */
874 if (data_len == 0) {
875 /* end of file already reached */
876 break;
877 }
878 /* last line of file has no newline */
879 ptrend = ptr + data_len;
880 newlinep = 0;
881 goto L_start_process;
882 }
883 offset = data_len;
884 data_len += count;
885 }
886
887 /*
888 * Look for newline in the chunk
889 * between ptr + offset and ptr + data_len - offset.
890 */
891 ptrend = find_nl(ptr + offset, data_len - offset);
892 if (ptrend == NULL) {
893 /* no newline found in this chunk */
894 if (ptr > prntbuf) {
895 /*
896 * Move remaining data to the beginning
897 * of the buffer.
898 * Remaining data lie from ptr for
899 * data_len bytes.
900 */
901 (void) memmove(prntbuf, ptr, data_len);
902 }
903 if (data_len == prntbuflen) {
904 /*
905 * No enough room in the buffer
906 */
907 prntbuflen += BUFSIZE;
908 prntbuf = realloc(prntbuf, prntbuflen + 1);
909 if (prntbuf == NULL) {
910 (void) fprintf(stderr,
911 gettext("%s: out of memory\n"),
912 cmdname);
913 exit(2);
914 }
915 }
916 ptr = prntbuf;
917 /* read the next input */
918 continue;
919 }
920 L_start_process:
921
922 /*
923 * Beginning of the chunk: ptr
924 * End of the chunk: ptr + data_len
925 * Beginning of the line: ptr
926 * End of the line: ptrend
927 */
928
929 if (use_bmg) {
930 /*
931 * Use Boyer-Moore-Gosper algorithm to find out if
932 * this chunk (not this line) contains the specified
933 * pattern. If not, restart from the last line
934 * of this chunk.
935 */
936 char *bline;
937 bline = bmgexec(ptr, ptr + data_len);
938 if (bline == NULL) {
939 /*
940 * No pattern found in this chunk.
941 * Need to find the last line
942 * in this chunk.
943 */
944 ptrend = rfind_nl(ptr, data_len);
945
946 /*
947 * When this chunk does not contain newline,
948 * ptrend becomes NULL, which should happen
949 * when the last line of file does not end
950 * with a newline. At such a point,
951 * newlinep should have been set to 0.
952 * Therefore, just after jumping to
953 * L_skip_line, the main for-loop quits,
954 * and the line_len value won't be
955 * used.
956 */
957 line_len = ptrend - ptr;
958 goto L_skip_line;
959 }
960 if (bline > ptrend) {
961 /*
962 * Pattern found not in the first line
963 * of this chunk.
964 * Discard the first line.
965 */
966 line_len = ptrend - ptr;
967 goto L_skip_line;
968 }
969 /*
970 * Pattern found in the first line of this chunk.
971 * Using this result.
972 */
973 *ptrend = '\0';
974 line_len = ptrend - ptr;
975
976 /*
977 * before jumping to L_next_line,
978 * need to handle xflag if specified
979 */
980 if (xflag && (line_len != bmglen ||
981 strcmp(bmgpat, ptr) != 0)) {
982 /* didn't match */
983 pp = NULL;
984 } else {
985 pp = patterns; /* to make it happen */
986 }
987 goto L_next_line;
988 }
989 lineno++;
990 /*
991 * Line starts from ptr and ends at ptrend.
992 * line_len will be the length of the line.
993 */
994 *ptrend = '\0';
995 line_len = ptrend - ptr;
996
997 /*
998 * From now, the process will be performed based
999 * on the line from ptr to ptrend.
1000 */
1001 if (use_wchar) {
1002 size_t len;
1003
1004 if (line_len >= outbuflen) {
1005 outbuflen = line_len + 1;
1006 outline = realloc(outline,
1007 sizeof (wchar_t) * outbuflen);
1008 if (outline == NULL) {
1009 (void) fprintf(stderr,
1010 gettext("%s: out of memory\n"),
1011 cmdname);
1012 exit(2);
1013 }
1014 }
1015
1016 len = mbstowcs(outline, ptr, line_len);
1017 if (len == (size_t)-1) {
1018 (void) fprintf(stderr, gettext(
1019 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1020 cmdname, fn, lineno);
1021 /* never match a line with invalid sequence */
1022 goto L_skip_line;
1023 }
1024 outline[len] = L'\0';
1025
1026 if (iflag) {
1027 wchar_t *cp;
1028 for (cp = outline; *cp != '\0'; cp++) {
1029 *cp = towlower((wint_t)*cp);
1030 }
1031 }
1032
1033 if (xflag) {
1034 for (pp = patterns; pp; pp = pp->next) {
1035 if (outline[0] == pp->wpattern[0] &&
1036 wcscmp(outline,
1037 pp->wpattern) == 0) {
1038 /* matched */
1039 break;
1040 }
1041 }
1042 } else {
1043 for (pp = patterns; pp; pp = pp->next) {
1044 if (wcswcs(outline, pp->wpattern)
1045 != NULL) {
1046 /* matched */
1047 break;
1048 }
1049 }
1050 }
1051 } else if (Fflag) {
1052 /* fgrep in byte-oriented handling */
1053 char *fptr;
1054 if (iflag) {
1055 fptr = istrdup(ptr);
1056 } else {
1057 fptr = ptr;
1058 }
1059 if (xflag) {
1060 /* fgrep -x */
1061 for (pp = patterns; pp; pp = pp->next) {
1062 if (fptr[0] == pp->pattern[0] &&
1063 strcmp(fptr, pp->pattern) == 0) {
1064 /* matched */
1065 break;
1066 }
1067 }
1068 } else {
1069 for (pp = patterns; pp; pp = pp->next) {
1070 if (strstr(fptr, pp->pattern) != NULL) {
1071 /* matched */
1072 break;
1073 }
1074 }
1075 }
1076 } else {
1077 /* grep or egrep */
1078 for (pp = patterns; pp; pp = pp->next) {
1079 int rv;
1080
1081 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1082 if (rv == REG_OK) {
1083 /* matched */
1084 break;
1085 }
1086
1087 switch (rv) {
1088 case REG_NOMATCH:
1089 break;
1090 case REG_ECHAR:
1091 (void) fprintf(stderr, gettext(
1092 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1093 cmdname, fn, lineno);
1094 break;
1095 default:
1096 (void) regerror(rv, &pp->re, errstr,
1097 sizeof (errstr));
1098 (void) fprintf(stderr, gettext(
1099 "%s: input file \"%s\": line %lld: %s\n"),
1100 cmdname, fn, lineno, errstr);
1101 exit(2);
1102 }
1103 }
1104 }
1105
1106 L_next_line:
1107 /*
1108 * Here, if pp points to non-NULL, something has been matched
1109 * to the pattern.
1110 */
1111 if (nvflag == (pp != NULL)) {
1112 matches++;
1113 /*
1114 * Handle q, l, and c flags.
1115 */
1116 if (qflag) {
1117 /* no need to continue */
1118 /*
1119 * End of this line is ptrend.
1120 * We have read up to ptr + data_len.
1121 */
1122 off_t pos;
1123 pos = ptr + data_len - (ptrend + 1);
1124 (void) lseek(fd, -pos, SEEK_CUR);
1125 exit(0);
1126 }
1127 if (lflag) {
1128 (void) printf("%s\n", fn);
1129 break;
1130 }
1131 if (!cflag) {
1132 if (Hflag || outfn) {
1133 (void) printf("%s:", fn);
1134 }
1135 if (bflag) {
1136 (void) printf("%lld:", (offset_t)
1137 (line_offset / BSIZE));
1138 }
1139 if (nflag) {
1140 (void) printf("%lld:", lineno);
1141 }
1142 *ptrend = '\n';
1143 (void) fwrite(ptr, 1, line_len + 1, stdout);
1144 }
1145 if (ferror(stdout)) {
1146 return (0);
1147 }
1148 }
1149 L_skip_line:
1150 if (!newlinep)
1151 break;
1152
1153 data_len -= line_len + 1;
1154 line_offset += line_len + 1;
1155 ptr = ptrend + 1;
1156 }
1157
1158 if (cflag) {
1159 if (Hflag || outfn) {
1160 (void) printf("%s:", fn);
1161 }
1162 if (!qflag) {
1163 (void) printf("%lld\n", matches);
1164 }
1165 }
1166 return (matches != 0);
1167 }
1168
1169 /*
1170 * usage message for grep
1171 */
1172 static void
1173 usage(void)
1174 {
1175 if (egrep || fgrep) {
1176 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1177 (void) fprintf(stderr,
1178 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1179 "pattern_list [file ...]\n"));
1180
1181 (void) fprintf(stderr, "\t%s", cmdname);
1182 (void) fprintf(stderr,
1183 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1184 "[-e pattern_list]... "
1185 "[-f pattern_file]... [file...]\n"));
1186 } else {
1187 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1188 (void) fprintf(stderr,
1189 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1190 "pattern_list [file ...]\n"));
1191
1192 (void) fprintf(stderr, "\t%s", cmdname);
1193 (void) fprintf(stderr,
1194 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1195 "[-e pattern_list]... "
1196 "[-f pattern_file]... [file...]\n"));
1197
1198 (void) fprintf(stderr, "\t%s", cmdname);
1199 (void) fprintf(stderr,
1200 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1201 "pattern_list [file ...]\n"));
1202
1203 (void) fprintf(stderr, "\t%s", cmdname);
1204 (void) fprintf(stderr,
1205 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1206 "[-e pattern_list]... "
1207 "[-f pattern_file]... [file...]\n"));
1208
1209 (void) fprintf(stderr, "\t%s", cmdname);
1210 (void) fprintf(stderr,
1211 gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1212 "pattern_list [file ...]\n"));
1213
1214 (void) fprintf(stderr, "\t%s", cmdname);
1215 (void) fprintf(stderr,
1216 gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1217 "[-f pattern_file]... [file...]\n"));
1218 }
1219 exit(2);
1220 /* NOTREACHED */
1221 }
1222
1223 /*
1224 * Compile literal pattern into BMG tables
1225 */
1226 static void
1227 bmgcomp(char *pat, int len)
1228 {
1229 int i;
1230 int tlen;
1231 unsigned char *uc = (unsigned char *)pat;
1232
1233 bmglen = len;
1234 bmgpat = pat;
1235
1236 for (i = 0; i < M_CSETSIZE; i++) {
1237 bmgtab[i] = len;
1238 }
1239
1240 len--;
1241 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1242 bmgtab[*uc++] = tlen;
1243 }
1244 }
1245
1246 /*
1247 * BMG search.
1248 */
1249 static char *
1250 bmgexec(char *str, char *end)
1251 {
1252 int t;
1253 char *k, *s, *p;
1254
1255 k = str + bmglen - 1;
1256 if (bmglen == 1) {
1257 return (memchr(str, bmgpat[0], end - str));
1258 }
1259 for (; ; ) {
1260 /* inner loop, should be most optimized */
1261 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1262 k += t;
1263 }
1264 if (k >= end) {
1265 return (NULL);
1266 }
1267 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1268 if (p == bmgpat) {
1269 return (s);
1270 }
1271 }
1272 k++;
1273 }
1274 /* NOTREACHED */
1275 }