1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * grep - pattern matching program - combined grep, egrep, and fgrep.
31 * Based on MKS grep command, with XCU & Solaris mods.
32 */
33
34 /*
35 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
36 *
37 */
38
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54
55 #define BSIZE 512 /* Size of block for -b */
56 #define BUFSIZE 8192 /* Input buffer size */
57
58 #define M_CSETSIZE 256 /* singlebyte chars */
59 static int bmglen; /* length of BMG pattern */
60 static char *bmgpat; /* BMG pattern */
61 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
62
63 typedef struct _PATTERN {
64 char *pattern; /* original pattern */
65 wchar_t *wpattern; /* wide, lowercased pattern */
66 struct _PATTERN *next;
67 regex_t re; /* compiled pattern */
68 } PATTERN;
69
70 static PATTERN *patterns;
71 static char errstr[128]; /* regerror string buffer */
72 static int regflags = 0; /* regcomp options */
73 static uchar_t fgrep = 0; /* Invoked as fgrep */
74 static uchar_t egrep = 0; /* Invoked as egrep */
75 static uchar_t nvflag = 1; /* Print matching lines */
76 static uchar_t cflag; /* Count of matches */
77 static uchar_t iflag; /* Case insensitve matching */
78 static uchar_t hflag; /* Supress printing of filename */
79 static uchar_t lflag; /* Print file names of matches */
80 static uchar_t nflag; /* Precede lines by line number */
81 static uchar_t bflag; /* Preccede matches by block number */
82 static uchar_t sflag; /* Suppress file error messages */
83 static uchar_t qflag; /* Suppress standard output */
84 static uchar_t wflag; /* Search for expression as a word */
85 static uchar_t xflag; /* Anchoring */
86 static uchar_t Eflag; /* Egrep or -E flag */
87 static uchar_t Fflag; /* Fgrep or -F flag */
88 static uchar_t outfn; /* Put out file name */
89 static char *cmdname;
90
91 static int use_wchar, use_bmg, mblocale;
92
93 static size_t outbuflen, prntbuflen;
94 static char *prntbuf;
95 static wchar_t *outline;
96
97 static void addfile(char *fn);
98 static void addpattern(char *s);
99 static void fixpatterns(void);
100 static void usage(void);
101 static int grep(int, char *);
102 static void bmgcomp(char *, int);
103 static char *bmgexec(char *, char *);
104
105 /*
106 * mainline for grep
107 */
108 int
109 main(int argc, char **argv)
110 {
111 char *ap;
112 int matched = 0;
113 int c;
114 int fflag = 0;
115 int errors = 0;
116 int i, n_pattern = 0, n_file = 0;
117 char **pattern_list = NULL;
118 char **file_list = NULL;
119
120 (void) setlocale(LC_ALL, "");
121 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
122 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
123 #endif
124 (void) textdomain(TEXT_DOMAIN);
125
126 /*
127 * true if this is running on the multibyte locale
128 */
129 mblocale = (MB_CUR_MAX > 1);
130 /*
131 * Skip leading slashes
132 */
133 cmdname = argv[0];
134 if (ap = strrchr(cmdname, '/'))
135 cmdname = ap + 1;
136
137 ap = cmdname;
138 /*
139 * Detect egrep/fgrep via command name, map to -E and -F options.
140 */
141 if (*ap == 'e' || *ap == 'E') {
142 regflags |= REG_EXTENDED;
143 egrep++;
144 } else {
145 if (*ap == 'f' || *ap == 'F') {
146 fgrep++;
147 }
148 }
149
150 while ((c = getopt(argc, argv, "vwchilnbse:f:qxEFI")) != EOF) {
151 switch (c) {
152 case 'v': /* POSIX: negate matches */
153 nvflag = 0;
154 break;
155
156 case 'c': /* POSIX: write count */
157 cflag++;
158 break;
159
160 case 'i': /* POSIX: ignore case */
161 iflag++;
162 regflags |= REG_ICASE;
163 break;
164
165 case 'l': /* POSIX: Write filenames only */
166 lflag++;
167 break;
168
169 case 'n': /* POSIX: Write line numbers */
170 nflag++;
171 break;
172
173 case 'b': /* Solaris: Write file block numbers */
174 bflag++;
175 break;
176
177 case 's': /* POSIX: No error msgs for files */
178 sflag++;
179 break;
180
181 case 'e': /* POSIX: pattern list */
182 n_pattern++;
183 pattern_list = realloc(pattern_list,
184 sizeof (char *) * n_pattern);
185 if (pattern_list == NULL) {
186 (void) fprintf(stderr,
187 gettext("%s: out of memory\n"),
188 cmdname);
189 exit(2);
190 }
191 *(pattern_list + n_pattern - 1) = optarg;
192 break;
193
194 case 'f': /* POSIX: pattern file */
195 fflag = 1;
196 n_file++;
197 file_list = realloc(file_list,
198 sizeof (char *) * n_file);
199 if (file_list == NULL) {
200 (void) fprintf(stderr,
201 gettext("%s: out of memory\n"),
202 cmdname);
203 exit(2);
204 }
205 *(file_list + n_file - 1) = optarg;
206 break;
207 case 'h': /* Solaris: supress printing of file name */
208 hflag = 1;
209 break;
210
211 case 'q': /* POSIX: quiet: status only */
212 qflag++;
213 break;
214
215 case 'w': /* Solaris: treat pattern as word */
216 wflag++;
217 break;
218
219 case 'x': /* POSIX: full line matches */
220 xflag++;
221 regflags |= REG_ANCHOR;
222 break;
223
224 case 'E': /* POSIX: Extended RE's */
225 regflags |= REG_EXTENDED;
226 Eflag++;
227 break;
228
229 case 'F': /* POSIX: strings, not RE's */
230 Fflag++;
231 break;
232
233 default:
234 usage();
235 }
236 }
237 /*
238 * If we're invoked as egrep or fgrep we need to do some checks
239 */
240
241 if (egrep || fgrep) {
242 /*
243 * Use of -E or -F with egrep or fgrep is illegal
244 */
245 if (Eflag || Fflag)
246 usage();
247 /*
248 * Don't allow use of wflag with egrep / fgrep
249 */
250 if (wflag)
251 usage();
252 /*
253 * For Solaris the -s flag is equivalent to XCU -q
254 */
255 if (sflag)
256 qflag++;
257 /*
258 * done with above checks - set the appropriate flags
259 */
260 if (egrep)
261 Eflag++;
262 else /* Else fgrep */
263 Fflag++;
264 }
265
266 if (wflag && (Eflag || Fflag)) {
267 /*
268 * -w cannot be specified with grep -F
269 */
270 usage();
271 }
272
273 /*
274 * -E and -F flags are mutually exclusive - check for this
275 */
276 if (Eflag && Fflag)
277 usage();
278
279 /*
280 * -c, -l and -q flags are mutually exclusive
281 * We have -c override -l like in Solaris.
282 * -q overrides -l & -c programmatically in grep() function.
283 */
284 if (cflag && lflag)
285 lflag = 0;
286
287 argv += optind - 1;
288 argc -= optind - 1;
289
290 /*
291 * Now handling -e and -f option
292 */
293 if (pattern_list) {
294 for (i = 0; i < n_pattern; i++) {
295 addpattern(pattern_list[i]);
296 }
297 free(pattern_list);
298 }
299 if (file_list) {
300 for (i = 0; i < n_file; i++) {
301 addfile(file_list[i]);
302 }
303 free(file_list);
304 }
305
306 /*
307 * No -e or -f? Make sure there is one more arg, use it as the pattern.
308 */
309 if (patterns == NULL && !fflag) {
310 if (argc < 2)
311 usage();
312 addpattern(argv[1]);
313 argc--;
314 argv++;
315 }
316
317 /*
318 * If -x flag is not specified or -i flag is specified
319 * with fgrep in a multibyte locale, need to use
320 * the wide character APIs. Otherwise, byte-oriented
321 * process will be done.
322 */
323 use_wchar = Fflag && mblocale && (!xflag || iflag);
324
325 /*
326 * Compile Patterns and also decide if BMG can be used
327 */
328 fixpatterns();
329
330 /* Process all files: stdin, or rest of arg list */
331 if (argc < 2) {
332 matched = grep(0, gettext("(standard input)"));
333 } else {
334 if (argc > 2 && hflag == 0)
335 outfn = 1; /* Print filename on match line */
336 for (argv++; *argv != NULL; argv++) {
337 int fd;
338
339 if ((fd = open(*argv, O_RDONLY)) == -1) {
340 errors = 1;
341 if (sflag)
342 continue;
343 (void) fprintf(stderr, gettext(
344 "%s: can't open \"%s\"\n"),
345 cmdname, *argv);
346 continue;
347 }
348 matched |= grep(fd, *argv);
349 (void) close(fd);
350 if (ferror(stdout))
351 break;
352 }
353 }
354 /*
355 * Return() here is used instead of exit
356 */
357
358 (void) fflush(stdout);
359
360 if (errors)
361 return (2);
362 return (matched ? 0 : 1);
363 }
364
365 /*
366 * Add a file of strings to the pattern list.
367 */
368 static void
369 addfile(char *fn)
370 {
371 FILE *fp;
372 char *inbuf;
373 char *bufp;
374 size_t bufsiz, buflen, bufused;
375
376 /*
377 * Open the pattern file
378 */
379 if ((fp = fopen(fn, "r")) == NULL) {
380 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
381 cmdname, fn);
382 exit(2);
383 }
384 bufsiz = BUFSIZE;
385 if ((inbuf = malloc(bufsiz)) == NULL) {
386 (void) fprintf(stderr,
387 gettext("%s: out of memory\n"), cmdname);
388 exit(2);
389 }
390 bufp = inbuf;
391 bufused = 0;
392 /*
393 * Read in the file, reallocing as we need more memory
394 */
395 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
396 buflen = strlen(bufp);
397 bufused += buflen;
398 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
399 /*
400 * if this line does not fit to the buffer,
401 * realloc larger buffer
402 */
403 bufsiz += BUFSIZE;
404 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
405 (void) fprintf(stderr,
406 gettext("%s: out of memory\n"),
407 cmdname);
408 exit(2);
409 }
410 bufp = inbuf + bufused;
411 continue;
412 }
413 if (bufp[buflen - 1] == '\n') {
414 bufp[--buflen] = '\0';
415 }
416 addpattern(inbuf);
417
418 bufp = inbuf;
419 bufused = 0;
420 }
421 free(inbuf);
422 (void) fclose(fp);
423 }
424
425 /*
426 * Add a string to the pattern list.
427 */
428 static void
429 addpattern(char *s)
430 {
431 PATTERN *pp;
432 char *wordbuf;
433 char *np;
434
435 for (; ; ) {
436 np = strchr(s, '\n');
437 if (np != NULL)
438 *np = '\0';
439 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
440 (void) fprintf(stderr, gettext(
441 "%s: out of memory\n"),
442 cmdname);
443 exit(2);
444 }
445 if (wflag) {
446 /*
447 * Solaris wflag support: Add '<' '>' to pattern to
448 * select it as a word. Doesn't make sense with -F
449 * but we're Libertarian.
450 */
451 size_t slen, wordlen;
452
453 slen = strlen(s);
454 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
455 if ((wordbuf = malloc(wordlen)) == NULL) {
456 (void) fprintf(stderr,
457 gettext("%s: out of memory\n"),
458 cmdname);
459 exit(2);
460 }
461 (void) strcpy(wordbuf, "\\<");
462 (void) strcpy(wordbuf + 2, s);
463 (void) strcpy(wordbuf + 2 + slen, "\\>");
464 } else {
465 if ((wordbuf = strdup(s)) == NULL) {
466 (void) fprintf(stderr,
467 gettext("%s: out of memory\n"),
468 cmdname);
469 exit(2);
470 }
471 }
472 pp->pattern = wordbuf;
473 pp->next = patterns;
474 patterns = pp;
475 if (np == NULL)
476 break;
477 s = np + 1;
478 }
479 }
480
481 /*
482 * Fix patterns.
483 * Must do after all arguments read, in case later -i option.
484 */
485 static void
486 fixpatterns(void)
487 {
488 PATTERN *pp;
489 int rv, fix_pattern, npatterns;
490
491 /*
492 * As REG_ANCHOR flag is not supported in the current Solaris,
493 * need to fix the specified pattern if -x is specified with
494 * grep or egrep
495 */
496 fix_pattern = !Fflag && xflag;
497
498 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
499 npatterns++;
500 if (fix_pattern) {
501 char *cp, *cq;
502 size_t plen, nplen;
503
504 plen = strlen(pp->pattern);
505 /* '^' pattern '$' */
506 nplen = 1 + plen + 1 + 1;
507 if ((cp = malloc(nplen)) == NULL) {
508 (void) fprintf(stderr,
509 gettext("%s: out of memory\n"),
510 cmdname);
511 exit(2);
512 }
513 cq = cp;
514 *cq++ = '^';
515 cq = strcpy(cq, pp->pattern) + plen;
516 *cq++ = '$';
517 *cq = '\0';
518 free(pp->pattern);
519 pp->pattern = cp;
520 }
521
522 if (Fflag) {
523 if (use_wchar) {
524 /*
525 * Fflag && mblocale && iflag
526 * Fflag && mblocale && !xflag
527 */
528 size_t n;
529 n = strlen(pp->pattern) + 1;
530 if ((pp->wpattern =
531 malloc(sizeof (wchar_t) * n)) == NULL) {
532 (void) fprintf(stderr,
533 gettext("%s: out of memory\n"),
534 cmdname);
535 exit(2);
536 }
537 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
538 (size_t)-1) {
539 (void) fprintf(stderr,
540 gettext("%s: failed to convert "
541 "\"%s\" to wide-characters\n"),
542 cmdname, pp->pattern);
543 exit(2);
544 }
545 if (iflag) {
546 wchar_t *wp;
547 for (wp = pp->wpattern; *wp != L'\0';
548 wp++) {
549 *wp = towlower((wint_t)*wp);
550 }
551 }
552 free(pp->pattern);
553 } else {
554 /*
555 * Fflag && mblocale && !iflag
556 * Fflag && !mblocale && iflag
557 * Fflag && !mblocale && !iflag
558 */
559 if (iflag) {
560 unsigned char *cp;
561 for (cp = (unsigned char *)pp->pattern;
562 *cp != '\0'; cp++) {
563 *cp = tolower(*cp);
564 }
565 }
566 }
567 /*
568 * fgrep: No regular expressions.
569 */
570 continue;
571 }
572
573 /*
574 * For non-fgrep, compile the regular expression,
575 * give an informative error message, and exit if
576 * it didn't compile.
577 */
578 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
579 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
580 (void) fprintf(stderr,
581 gettext("%s: RE error in %s: %s\n"),
582 cmdname, pp->pattern, errstr);
583 exit(2);
584 }
585 free(pp->pattern);
586 }
587
588 /*
589 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
590 * Use the Boyer-Moore-Gosper algorithm if:
591 * - fgrep (Fflag)
592 * - singlebyte locale (!mblocale)
593 * - no ignoring case (!iflag)
594 * - no printing line numbers (!nflag)
595 * - no negating the output (nvflag)
596 * - only one pattern (npatterns == 1)
597 * - non zero length pattern (strlen(patterns->pattern) != 0)
598 *
599 * It's guaranteed patterns->pattern is still alive
600 * when Fflag && !mblocale.
601 */
602 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
603 (npatterns == 1) && (strlen(patterns->pattern) != 0);
604 }
605
606 /*
607 * Search a newline from the beginning of the string
608 */
609 static char *
610 find_nl(const char *ptr, size_t len)
611 {
612 while (len-- != 0) {
613 if (*ptr++ == '\n') {
614 return ((char *)--ptr);
615 }
616 }
617 return (NULL);
618 }
619
620 /*
621 * Search a newline from the end of the string
622 */
623 static char *
624 rfind_nl(const char *ptr, size_t len)
625 {
626 const char *uptr = ptr + len;
627 while (len--) {
628 if (*--uptr == '\n') {
629 return ((char *)uptr);
630 }
631 }
632 return (NULL);
633 }
634
635 /*
636 * Duplicate the specified string converting each character
637 * into a lower case.
638 */
639 static char *
640 istrdup(const char *s1)
641 {
642 static size_t ibuflen = 0;
643 static char *ibuf = NULL;
644 size_t slen;
645 char *p;
646
647 slen = strlen(s1);
648 if (slen >= ibuflen) {
649 /* ibuf does not fit to s1 */
650 ibuflen = slen + 1;
651 ibuf = realloc(ibuf, ibuflen);
652 if (ibuf == NULL) {
653 (void) fprintf(stderr,
654 gettext("%s: out of memory\n"), cmdname);
655 exit(2);
656 }
657 }
658 p = ibuf;
659 do {
660 *p++ = tolower(*s1);
661 } while (*s1++ != '\0');
662 return (ibuf);
663 }
664
665 /*
666 * Do grep on a single file.
667 * Return true in any lines matched.
668 *
669 * We have two strategies:
670 * The fast one is used when we have a single pattern with
671 * a string known to occur in the pattern. We can then
672 * do a BMG match on the whole buffer.
673 * This is an order of magnitude faster.
674 * Otherwise we split the buffer into lines,
675 * and check for a match on each line.
676 */
677 static int
678 grep(int fd, char *fn)
679 {
680 PATTERN *pp;
681 off_t data_len; /* length of the data chunk */
682 off_t line_len; /* length of the current line */
683 off_t line_offset; /* current line's offset from the beginning */
684 long long lineno;
685 long long matches = 0; /* Number of matching lines */
686 int newlinep; /* 0 if the last line of file has no newline */
687 char *ptr, *ptrend;
688
689
690 if (patterns == NULL)
691 return (0); /* no patterns to match -- just return */
692
693 pp = patterns;
694
695 if (use_bmg) {
696 bmgcomp(pp->pattern, strlen(pp->pattern));
697 }
698
699 if (use_wchar && outline == NULL) {
700 outbuflen = BUFSIZE + 1;
701 outline = malloc(sizeof (wchar_t) * outbuflen);
702 if (outline == NULL) {
703 (void) fprintf(stderr, gettext("%s: out of memory\n"),
704 cmdname);
705 exit(2);
706 }
707 }
708
709 if (prntbuf == NULL) {
710 prntbuflen = BUFSIZE;
711 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
712 (void) fprintf(stderr, gettext("%s: out of memory\n"),
713 cmdname);
714 exit(2);
715 }
716 }
717
718 line_offset = 0;
719 lineno = 0;
720 newlinep = 1;
721 data_len = 0;
722 for (; ; ) {
723 long count;
724 off_t offset = 0;
725
726 if (data_len == 0) {
727 /*
728 * If no data in the buffer, reset ptr
729 */
730 ptr = prntbuf;
731 }
732 if (ptr == prntbuf) {
733 /*
734 * The current data chunk starts from prntbuf.
735 * This means either the buffer has no data
736 * or the buffer has no newline.
737 * So, read more data from input.
738 */
739 count = read(fd, ptr + data_len, prntbuflen - data_len);
740 if (count < 0) {
741 /* read error */
742 if (cflag) {
743 if (outfn) {
744 (void) fprintf(stdout,
745 "%s:", fn);
746 }
747 if (!qflag) {
748 (void) fprintf(stdout, "%lld\n",
749 matches);
750 }
751 }
752 return (0);
753 } else if (count == 0) {
754 /* no new data */
755 if (data_len == 0) {
756 /* end of file already reached */
757 break;
758 }
759 /* last line of file has no newline */
760 ptrend = ptr + data_len;
761 newlinep = 0;
762 goto L_start_process;
763 }
764 offset = data_len;
765 data_len += count;
766 }
767
768 /*
769 * Look for newline in the chunk
770 * between ptr + offset and ptr + data_len - offset.
771 */
772 ptrend = find_nl(ptr + offset, data_len - offset);
773 if (ptrend == NULL) {
774 /* no newline found in this chunk */
775 if (ptr > prntbuf) {
776 /*
777 * Move remaining data to the beginning
778 * of the buffer.
779 * Remaining data lie from ptr for
780 * data_len bytes.
781 */
782 (void) memmove(prntbuf, ptr, data_len);
783 }
784 if (data_len == prntbuflen) {
785 /*
786 * No enough room in the buffer
787 */
788 prntbuflen += BUFSIZE;
789 prntbuf = realloc(prntbuf, prntbuflen + 1);
790 if (prntbuf == NULL) {
791 (void) fprintf(stderr,
792 gettext("%s: out of memory\n"),
793 cmdname);
794 exit(2);
795 }
796 }
797 ptr = prntbuf;
798 /* read the next input */
799 continue;
800 }
801 L_start_process:
802
803 /*
804 * Beginning of the chunk: ptr
805 * End of the chunk: ptr + data_len
806 * Beginning of the line: ptr
807 * End of the line: ptrend
808 */
809
810 if (use_bmg) {
811 /*
812 * Use Boyer-Moore-Gosper algorithm to find out if
813 * this chunk (not this line) contains the specified
814 * pattern. If not, restart from the last line
815 * of this chunk.
816 */
817 char *bline;
818 bline = bmgexec(ptr, ptr + data_len);
819 if (bline == NULL) {
820 /*
821 * No pattern found in this chunk.
822 * Need to find the last line
823 * in this chunk.
824 */
825 ptrend = rfind_nl(ptr, data_len);
826
827 /*
828 * When this chunk does not contain newline,
829 * ptrend becomes NULL, which should happen
830 * when the last line of file does not end
831 * with a newline. At such a point,
832 * newlinep should have been set to 0.
833 * Therefore, just after jumping to
834 * L_skip_line, the main for-loop quits,
835 * and the line_len value won't be
836 * used.
837 */
838 line_len = ptrend - ptr;
839 goto L_skip_line;
840 }
841 if (bline > ptrend) {
842 /*
843 * Pattern found not in the first line
844 * of this chunk.
845 * Discard the first line.
846 */
847 line_len = ptrend - ptr;
848 goto L_skip_line;
849 }
850 /*
851 * Pattern found in the first line of this chunk.
852 * Using this result.
853 */
854 *ptrend = '\0';
855 line_len = ptrend - ptr;
856
857 /*
858 * before jumping to L_next_line,
859 * need to handle xflag if specified
860 */
861 if (xflag && (line_len != bmglen ||
862 strcmp(bmgpat, ptr) != 0)) {
863 /* didn't match */
864 pp = NULL;
865 } else {
866 pp = patterns; /* to make it happen */
867 }
868 goto L_next_line;
869 }
870 lineno++;
871 /*
872 * Line starts from ptr and ends at ptrend.
873 * line_len will be the length of the line.
874 */
875 *ptrend = '\0';
876 line_len = ptrend - ptr;
877
878 /*
879 * From now, the process will be performed based
880 * on the line from ptr to ptrend.
881 */
882 if (use_wchar) {
883 size_t len;
884
885 if (line_len >= outbuflen) {
886 outbuflen = line_len + 1;
887 outline = realloc(outline,
888 sizeof (wchar_t) * outbuflen);
889 if (outline == NULL) {
890 (void) fprintf(stderr,
891 gettext("%s: out of memory\n"),
892 cmdname);
893 exit(2);
894 }
895 }
896
897 len = mbstowcs(outline, ptr, line_len);
898 if (len == (size_t)-1) {
899 (void) fprintf(stderr, gettext(
900 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
901 cmdname, fn, lineno);
902 /* never match a line with invalid sequence */
903 goto L_skip_line;
904 }
905 outline[len] = L'\0';
906
907 if (iflag) {
908 wchar_t *cp;
909 for (cp = outline; *cp != '\0'; cp++) {
910 *cp = towlower((wint_t)*cp);
911 }
912 }
913
914 if (xflag) {
915 for (pp = patterns; pp; pp = pp->next) {
916 if (outline[0] == pp->wpattern[0] &&
917 wcscmp(outline,
918 pp->wpattern) == 0) {
919 /* matched */
920 break;
921 }
922 }
923 } else {
924 for (pp = patterns; pp; pp = pp->next) {
925 if (wcswcs(outline, pp->wpattern)
926 != NULL) {
927 /* matched */
928 break;
929 }
930 }
931 }
932 } else if (Fflag) {
933 /* fgrep in byte-oriented handling */
934 char *fptr;
935 if (iflag) {
936 fptr = istrdup(ptr);
937 } else {
938 fptr = ptr;
939 }
940 if (xflag) {
941 /* fgrep -x */
942 for (pp = patterns; pp; pp = pp->next) {
943 if (fptr[0] == pp->pattern[0] &&
944 strcmp(fptr, pp->pattern) == 0) {
945 /* matched */
946 break;
947 }
948 }
949 } else {
950 for (pp = patterns; pp; pp = pp->next) {
951 if (strstr(fptr, pp->pattern) != NULL) {
952 /* matched */
953 break;
954 }
955 }
956 }
957 } else {
958 /* grep or egrep */
959 for (pp = patterns; pp; pp = pp->next) {
960 int rv;
961
962 rv = regexec(&pp->re, ptr, 0, NULL, 0);
963 if (rv == REG_OK) {
964 /* matched */
965 break;
966 }
967
968 switch (rv) {
969 case REG_NOMATCH:
970 break;
971 case REG_ECHAR:
972 (void) fprintf(stderr, gettext(
973 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
974 cmdname, fn, lineno);
975 break;
976 default:
977 (void) regerror(rv, &pp->re, errstr,
978 sizeof (errstr));
979 (void) fprintf(stderr, gettext(
980 "%s: input file \"%s\": line %lld: %s\n"),
981 cmdname, fn, lineno, errstr);
982 exit(2);
983 }
984 }
985 }
986
987 L_next_line:
988 /*
989 * Here, if pp points to non-NULL, something has been matched
990 * to the pattern.
991 */
992 if (nvflag == (pp != NULL)) {
993 matches++;
994 /*
995 * Handle q, l, and c flags.
996 */
997 if (qflag) {
998 /* no need to continue */
999 /*
1000 * End of this line is ptrend.
1001 * We have read up to ptr + data_len.
1002 */
1003 off_t pos;
1004 pos = ptr + data_len - (ptrend + 1);
1005 (void) lseek(fd, -pos, SEEK_CUR);
1006 exit(0);
1007 }
1008 if (lflag) {
1009 (void) printf("%s\n", fn);
1010 break;
1011 }
1012 if (!cflag) {
1013 if (outfn) {
1014 (void) printf("%s:", fn);
1015 }
1016 if (bflag) {
1017 (void) printf("%lld:", (offset_t)
1018 (line_offset / BSIZE));
1019 }
1020 if (nflag) {
1021 (void) printf("%lld:", lineno);
1022 }
1023 *ptrend = '\n';
1024 (void) fwrite(ptr, 1, line_len + 1, stdout);
1025 }
1026 if (ferror(stdout)) {
1027 return (0);
1028 }
1029 }
1030 L_skip_line:
1031 if (!newlinep)
1032 break;
1033
1034 data_len -= line_len + 1;
1035 line_offset += line_len + 1;
1036 ptr = ptrend + 1;
1037 }
1038
1039 if (cflag) {
1040 if (outfn) {
1041 (void) printf("%s:", fn);
1042 }
1043 if (!qflag) {
1044 (void) printf("%lld\n", matches);
1045 }
1046 }
1047 return (matches != 0);
1048 }
1049
1050 /*
1051 * usage message for grep
1052 */
1053 static void
1054 usage(void)
1055 {
1056 if (egrep || fgrep) {
1057 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1058 (void) fprintf(stderr,
1059 gettext(" [-c|-l|-q] [-bhinsvx] "
1060 "pattern_list [file ...]\n"));
1061
1062 (void) fprintf(stderr, "\t%s", cmdname);
1063 (void) fprintf(stderr,
1064 gettext(" [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1065 "[-f pattern_file]... [file...]\n"));
1066 } else {
1067 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1068 (void) fprintf(stderr,
1069 gettext(" [-c|-l|-q] [-bhinsvwx] "
1070 "pattern_list [file ...]\n"));
1071
1072 (void) fprintf(stderr, "\t%s", cmdname);
1073 (void) fprintf(stderr,
1074 gettext(" [-c|-l|-q] [-bhinsvwx] [-e pattern_list]... "
1075 "[-f pattern_file]... [file...]\n"));
1076
1077 (void) fprintf(stderr, "\t%s", cmdname);
1078 (void) fprintf(stderr,
1079 gettext(" -E [-c|-l|-q] [-bhinsvx] "
1080 "pattern_list [file ...]\n"));
1081
1082 (void) fprintf(stderr, "\t%s", cmdname);
1083 (void) fprintf(stderr,
1084 gettext(" -E [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1085 "[-f pattern_file]... [file...]\n"));
1086
1087 (void) fprintf(stderr, "\t%s", cmdname);
1088 (void) fprintf(stderr,
1089 gettext(" -F [-c|-l|-q] [-bhinsvx] "
1090 "pattern_list [file ...]\n"));
1091
1092 (void) fprintf(stderr, "\t%s", cmdname);
1093 (void) fprintf(stderr,
1094 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1095 "[-f pattern_file]... [file...]\n"));
1096 }
1097 exit(2);
1098 /* NOTREACHED */
1099 }
1100
1101 /*
1102 * Compile literal pattern into BMG tables
1103 */
1104 static void
1105 bmgcomp(char *pat, int len)
1106 {
1107 int i;
1108 int tlen;
1109 unsigned char *uc = (unsigned char *)pat;
1110
1111 bmglen = len;
1112 bmgpat = pat;
1113
1114 for (i = 0; i < M_CSETSIZE; i++) {
1115 bmgtab[i] = len;
1116 }
1117
1118 len--;
1119 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1120 bmgtab[*uc++] = tlen;
1121 }
1122 }
1123
1124 /*
1125 * BMG search.
1126 */
1127 static char *
1128 bmgexec(char *str, char *end)
1129 {
1130 int t;
1131 char *k, *s, *p;
1132
1133 k = str + bmglen - 1;
1134 if (bmglen == 1) {
1135 return (memchr(str, bmgpat[0], end - str));
1136 }
1137 for (; ; ) {
1138 /* inner loop, should be most optimized */
1139 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1140 k += t;
1141 }
1142 if (k >= end) {
1143 return (NULL);
1144 }
1145 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1146 if (p == bmgpat) {
1147 return (s);
1148 }
1149 }
1150 k++;
1151 }
1152 /* NOTREACHED */
1153 }