Print this page
3047 grep support for -r would be useful
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/grep_xpg4/grep.c
+++ new/usr/src/cmd/grep_xpg4/grep.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License, Version 1.0 only
6 6 * (the "License"). You may not use this file except in compliance
7 7 * with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
21 21 */
22 22 /*
23 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 -#pragma ident "%Z%%M% %I% %E% SMI"
28 -
29 27 /*
30 28 * grep - pattern matching program - combined grep, egrep, and fgrep.
31 29 * Based on MKS grep command, with XCU & Solaris mods.
32 30 */
33 31
34 32 /*
35 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
36 34 *
37 35 */
38 36
37 +/* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38 +
39 39 #include <string.h>
40 40 #include <stdlib.h>
41 41 #include <ctype.h>
42 42 #include <stdarg.h>
43 43 #include <regex.h>
44 44 #include <limits.h>
45 45 #include <sys/types.h>
46 46 #include <sys/stat.h>
47 47 #include <fcntl.h>
48 48 #include <stdio.h>
49 49 #include <locale.h>
50 50 #include <wchar.h>
51 51 #include <errno.h>
52 52 #include <unistd.h>
53 53 #include <wctype.h>
54 +#include <ftw.h>
55 +#include <sys/param.h>
54 56
55 57 #define BSIZE 512 /* Size of block for -b */
56 58 #define BUFSIZE 8192 /* Input buffer size */
59 +#define MAX_DEPTH 1000 /* how deep to recurse */
57 60
58 61 #define M_CSETSIZE 256 /* singlebyte chars */
59 62 static int bmglen; /* length of BMG pattern */
60 63 static char *bmgpat; /* BMG pattern */
61 64 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
62 65
63 66 typedef struct _PATTERN {
64 67 char *pattern; /* original pattern */
65 68 wchar_t *wpattern; /* wide, lowercased pattern */
66 69 struct _PATTERN *next;
67 70 regex_t re; /* compiled pattern */
68 71 } PATTERN;
69 72
70 73 static PATTERN *patterns;
71 74 static char errstr[128]; /* regerror string buffer */
72 75 static int regflags = 0; /* regcomp options */
76 +static int matched = 0; /* return of the grep() */
77 +static int errors = 0; /* count of errors */
73 78 static uchar_t fgrep = 0; /* Invoked as fgrep */
74 79 static uchar_t egrep = 0; /* Invoked as egrep */
75 80 static uchar_t nvflag = 1; /* Print matching lines */
76 81 static uchar_t cflag; /* Count of matches */
77 82 static uchar_t iflag; /* Case insensitve matching */
78 83 static uchar_t hflag; /* Supress printing of filename */
79 84 static uchar_t lflag; /* Print file names of matches */
80 85 static uchar_t nflag; /* Precede lines by line number */
86 +static uchar_t rflag; /* Search directories recursively */
81 87 static uchar_t bflag; /* Preccede matches by block number */
82 88 static uchar_t sflag; /* Suppress file error messages */
83 89 static uchar_t qflag; /* Suppress standard output */
84 90 static uchar_t wflag; /* Search for expression as a word */
85 91 static uchar_t xflag; /* Anchoring */
86 92 static uchar_t Eflag; /* Egrep or -E flag */
87 93 static uchar_t Fflag; /* Fgrep or -F flag */
94 +static uchar_t Rflag; /* Like rflag, but follow symlinks */
88 95 static uchar_t outfn; /* Put out file name */
89 96 static char *cmdname;
90 97
91 98 static int use_wchar, use_bmg, mblocale;
92 99
93 100 static size_t outbuflen, prntbuflen;
94 101 static char *prntbuf;
95 102 static wchar_t *outline;
96 103
97 -static void addfile(char *fn);
104 +static void addfile(const char *fn);
98 105 static void addpattern(char *s);
99 106 static void fixpatterns(void);
100 107 static void usage(void);
101 -static int grep(int, char *);
108 +static int grep(int, const char *);
102 109 static void bmgcomp(char *, int);
103 110 static char *bmgexec(char *, char *);
111 +static int recursive(const char *, const struct stat *, int, struct FTW *);
112 +static void process_path(const char *);
113 +static void process_file(const char *, int);
104 114
105 115 /*
106 116 * mainline for grep
107 117 */
108 118 int
109 119 main(int argc, char **argv)
110 120 {
111 121 char *ap;
112 - int matched = 0;
113 122 int c;
114 123 int fflag = 0;
115 - int errors = 0;
116 124 int i, n_pattern = 0, n_file = 0;
117 125 char **pattern_list = NULL;
118 126 char **file_list = NULL;
119 127
120 128 (void) setlocale(LC_ALL, "");
121 129 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
122 130 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
123 131 #endif
124 132 (void) textdomain(TEXT_DOMAIN);
125 133
126 134 /*
127 135 * true if this is running on the multibyte locale
128 136 */
129 137 mblocale = (MB_CUR_MAX > 1);
130 138 /*
131 139 * Skip leading slashes
132 140 */
133 141 cmdname = argv[0];
134 142 if (ap = strrchr(cmdname, '/'))
135 143 cmdname = ap + 1;
136 144
137 145 ap = cmdname;
138 146 /*
139 147 * Detect egrep/fgrep via command name, map to -E and -F options.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
140 148 */
141 149 if (*ap == 'e' || *ap == 'E') {
142 150 regflags |= REG_EXTENDED;
143 151 egrep++;
144 152 } else {
145 153 if (*ap == 'f' || *ap == 'F') {
146 154 fgrep++;
147 155 }
148 156 }
149 157
150 - while ((c = getopt(argc, argv, "vwchilnbse:f:qxEFI")) != EOF) {
158 + while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
151 159 switch (c) {
152 160 case 'v': /* POSIX: negate matches */
153 161 nvflag = 0;
154 162 break;
155 163
156 164 case 'c': /* POSIX: write count */
157 165 cflag++;
158 166 break;
159 167
160 168 case 'i': /* POSIX: ignore case */
161 169 iflag++;
162 170 regflags |= REG_ICASE;
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
163 171 break;
164 172
165 173 case 'l': /* POSIX: Write filenames only */
166 174 lflag++;
167 175 break;
168 176
169 177 case 'n': /* POSIX: Write line numbers */
170 178 nflag++;
171 179 break;
172 180
181 + case 'r': /* Solaris: search recursively */
182 + rflag++;
183 + break;
184 +
173 185 case 'b': /* Solaris: Write file block numbers */
174 186 bflag++;
175 187 break;
176 188
177 189 case 's': /* POSIX: No error msgs for files */
178 190 sflag++;
179 191 break;
180 192
181 193 case 'e': /* POSIX: pattern list */
182 194 n_pattern++;
183 195 pattern_list = realloc(pattern_list,
184 196 sizeof (char *) * n_pattern);
185 197 if (pattern_list == NULL) {
186 198 (void) fprintf(stderr,
187 199 gettext("%s: out of memory\n"),
188 200 cmdname);
189 201 exit(2);
190 202 }
191 203 *(pattern_list + n_pattern - 1) = optarg;
192 204 break;
193 205
194 206 case 'f': /* POSIX: pattern file */
195 207 fflag = 1;
196 208 n_file++;
197 209 file_list = realloc(file_list,
198 210 sizeof (char *) * n_file);
199 211 if (file_list == NULL) {
200 212 (void) fprintf(stderr,
201 213 gettext("%s: out of memory\n"),
202 214 cmdname);
203 215 exit(2);
204 216 }
205 217 *(file_list + n_file - 1) = optarg;
206 218 break;
207 219 case 'h': /* Solaris: supress printing of file name */
208 220 hflag = 1;
209 221 break;
210 222
211 223 case 'q': /* POSIX: quiet: status only */
212 224 qflag++;
213 225 break;
214 226
215 227 case 'w': /* Solaris: treat pattern as word */
216 228 wflag++;
217 229 break;
218 230
219 231 case 'x': /* POSIX: full line matches */
220 232 xflag++;
221 233 regflags |= REG_ANCHOR;
222 234 break;
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
223 235
224 236 case 'E': /* POSIX: Extended RE's */
225 237 regflags |= REG_EXTENDED;
226 238 Eflag++;
227 239 break;
228 240
229 241 case 'F': /* POSIX: strings, not RE's */
230 242 Fflag++;
231 243 break;
232 244
245 + case 'R': /* Solaris: like rflag, but follow symlinks */
246 + Rflag++;
247 + rflag++;
248 + break;
249 +
233 250 default:
234 251 usage();
235 252 }
236 253 }
237 254 /*
238 255 * If we're invoked as egrep or fgrep we need to do some checks
239 256 */
240 257
241 258 if (egrep || fgrep) {
242 259 /*
243 260 * Use of -E or -F with egrep or fgrep is illegal
244 261 */
245 262 if (Eflag || Fflag)
246 263 usage();
247 264 /*
248 265 * Don't allow use of wflag with egrep / fgrep
249 266 */
250 267 if (wflag)
251 268 usage();
252 269 /*
253 270 * For Solaris the -s flag is equivalent to XCU -q
254 271 */
255 272 if (sflag)
256 273 qflag++;
257 274 /*
258 275 * done with above checks - set the appropriate flags
259 276 */
260 277 if (egrep)
261 278 Eflag++;
262 279 else /* Else fgrep */
263 280 Fflag++;
264 281 }
265 282
266 283 if (wflag && (Eflag || Fflag)) {
267 284 /*
268 285 * -w cannot be specified with grep -F
269 286 */
270 287 usage();
271 288 }
272 289
273 290 /*
274 291 * -E and -F flags are mutually exclusive - check for this
275 292 */
276 293 if (Eflag && Fflag)
277 294 usage();
278 295
279 296 /*
280 297 * -c, -l and -q flags are mutually exclusive
281 298 * We have -c override -l like in Solaris.
282 299 * -q overrides -l & -c programmatically in grep() function.
283 300 */
284 301 if (cflag && lflag)
285 302 lflag = 0;
286 303
287 304 argv += optind - 1;
288 305 argc -= optind - 1;
289 306
290 307 /*
291 308 * Now handling -e and -f option
292 309 */
293 310 if (pattern_list) {
294 311 for (i = 0; i < n_pattern; i++) {
295 312 addpattern(pattern_list[i]);
296 313 }
297 314 free(pattern_list);
298 315 }
299 316 if (file_list) {
300 317 for (i = 0; i < n_file; i++) {
301 318 addfile(file_list[i]);
302 319 }
303 320 free(file_list);
304 321 }
305 322
306 323 /*
307 324 * No -e or -f? Make sure there is one more arg, use it as the pattern.
308 325 */
309 326 if (patterns == NULL && !fflag) {
310 327 if (argc < 2)
311 328 usage();
312 329 addpattern(argv[1]);
313 330 argc--;
314 331 argv++;
315 332 }
316 333
317 334 /*
318 335 * If -x flag is not specified or -i flag is specified
319 336 * with fgrep in a multibyte locale, need to use
320 337 * the wide character APIs. Otherwise, byte-oriented
321 338 * process will be done.
322 339 */
323 340 use_wchar = Fflag && mblocale && (!xflag || iflag);
324 341
325 342 /*
326 343 * Compile Patterns and also decide if BMG can be used
↓ open down ↓ |
84 lines elided |
↑ open up ↑ |
327 344 */
328 345 fixpatterns();
329 346
330 347 /* Process all files: stdin, or rest of arg list */
331 348 if (argc < 2) {
332 349 matched = grep(0, gettext("(standard input)"));
333 350 } else {
334 351 if (argc > 2 && hflag == 0)
335 352 outfn = 1; /* Print filename on match line */
336 353 for (argv++; *argv != NULL; argv++) {
337 - int fd;
338 -
339 - if ((fd = open(*argv, O_RDONLY)) == -1) {
340 - errors = 1;
341 - if (sflag)
342 - continue;
343 - (void) fprintf(stderr, gettext(
344 - "%s: can't open \"%s\"\n"),
345 - cmdname, *argv);
346 - continue;
347 - }
348 - matched |= grep(fd, *argv);
349 - (void) close(fd);
350 - if (ferror(stdout))
351 - break;
354 + process_path(*argv);
352 355 }
353 356 }
354 357 /*
355 358 * Return() here is used instead of exit
356 359 */
357 360
358 361 (void) fflush(stdout);
359 362
360 363 if (errors)
361 364 return (2);
362 365 return (matched ? 0 : 1);
363 366 }
364 367
368 +static void
369 +process_path(const char *path)
370 +{
371 + struct stat st;
372 + int walkflags = FTW_CHDIR;
373 + char *buf = NULL;
374 +
375 + if (rflag) {
376 + if (stat(path, &st) != -1 &&
377 + (st.st_mode & S_IFMT) == S_IFDIR) {
378 + outfn = 1; /* Print filename */
379 +
380 + /*
381 + * Add trailing slash if arg
382 + * is directory, to resolve symlinks.
383 + */
384 + if (path[strlen(path) - 1] != '/') {
385 + (void) asprintf(&buf, "%s/", path);
386 + if (buf != NULL)
387 + path = buf;
388 + }
389 +
390 + /*
391 + * Search through subdirs if path is directory.
392 + * Don't follow symlinks if Rflag is not set.
393 + */
394 + if (!Rflag)
395 + walkflags |= FTW_PHYS;
396 +
397 + if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
398 + if (!sflag)
399 + (void) fprintf(stderr,
400 + gettext("%s: can't open \"%s\"\n"),
401 + cmdname, path);
402 + errors = 1;
403 + }
404 + return;
405 + }
406 + }
407 + process_file(path, 0);
408 +}
409 +
365 410 /*
411 + * Read and process all files in directory recursively.
412 + */
413 +static int
414 +recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
415 +{
416 + /*
417 + * Process files and follow symlinks if Rflag set.
418 + */
419 + if (info != FTW_F) {
420 + /* Report broken symlinks and unreadable files */
421 + if (!sflag &&
422 + (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
423 + (void) fprintf(stderr,
424 + gettext("%s: can't open \"%s\"\n"), cmdname, name);
425 + }
426 + return (0);
427 + }
428 +
429 +
430 + /* Skip devices and pipes if Rflag is not set */
431 + if (!Rflag && !S_ISREG(statp->st_mode))
432 + return (0);
433 + /* Pass offset to relative name from FTW_CHDIR */
434 + process_file(name, ftw->base);
435 + return (0);
436 +}
437 +
438 +/*
439 + * Opens file and call grep function.
440 + */
441 +static void
442 +process_file(const char *name, int base)
443 +{
444 + int fd;
445 +
446 + if ((fd = open(name + base, O_RDONLY)) == -1) {
447 + errors = 1;
448 + if (!sflag) /* Silent mode */
449 + (void) fprintf(stderr, gettext(
450 + "%s: can't open \"%s\"\n"),
451 + cmdname, name);
452 + return;
453 + }
454 + matched |= grep(fd, name);
455 + (void) close(fd);
456 +
457 + if (ferror(stdout)) {
458 + (void) fprintf(stderr, gettext(
459 + "%s: error writing to stdout\n"),
460 + cmdname);
461 + (void) fflush(stdout);
462 + exit(2);
463 + }
464 +
465 +}
466 +
467 +/*
366 468 * Add a file of strings to the pattern list.
367 469 */
368 470 static void
369 -addfile(char *fn)
471 +addfile(const char *fn)
370 472 {
371 473 FILE *fp;
372 474 char *inbuf;
373 475 char *bufp;
374 476 size_t bufsiz, buflen, bufused;
375 477
376 478 /*
377 479 * Open the pattern file
378 480 */
379 481 if ((fp = fopen(fn, "r")) == NULL) {
380 482 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
381 483 cmdname, fn);
382 484 exit(2);
383 485 }
384 486 bufsiz = BUFSIZE;
385 487 if ((inbuf = malloc(bufsiz)) == NULL) {
386 488 (void) fprintf(stderr,
387 489 gettext("%s: out of memory\n"), cmdname);
388 490 exit(2);
389 491 }
390 492 bufp = inbuf;
391 493 bufused = 0;
392 494 /*
393 495 * Read in the file, reallocing as we need more memory
394 496 */
395 497 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
396 498 buflen = strlen(bufp);
397 499 bufused += buflen;
398 500 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
399 501 /*
400 502 * if this line does not fit to the buffer,
401 503 * realloc larger buffer
402 504 */
403 505 bufsiz += BUFSIZE;
404 506 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
405 507 (void) fprintf(stderr,
406 508 gettext("%s: out of memory\n"),
407 509 cmdname);
408 510 exit(2);
409 511 }
410 512 bufp = inbuf + bufused;
411 513 continue;
412 514 }
413 515 if (bufp[buflen - 1] == '\n') {
414 516 bufp[--buflen] = '\0';
415 517 }
416 518 addpattern(inbuf);
417 519
418 520 bufp = inbuf;
419 521 bufused = 0;
420 522 }
421 523 free(inbuf);
422 524 (void) fclose(fp);
423 525 }
424 526
425 527 /*
426 528 * Add a string to the pattern list.
427 529 */
428 530 static void
429 531 addpattern(char *s)
430 532 {
431 533 PATTERN *pp;
432 534 char *wordbuf;
433 535 char *np;
434 536
435 537 for (; ; ) {
436 538 np = strchr(s, '\n');
437 539 if (np != NULL)
438 540 *np = '\0';
439 541 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
440 542 (void) fprintf(stderr, gettext(
441 543 "%s: out of memory\n"),
442 544 cmdname);
443 545 exit(2);
444 546 }
445 547 if (wflag) {
446 548 /*
447 549 * Solaris wflag support: Add '<' '>' to pattern to
448 550 * select it as a word. Doesn't make sense with -F
449 551 * but we're Libertarian.
450 552 */
451 553 size_t slen, wordlen;
452 554
453 555 slen = strlen(s);
454 556 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
455 557 if ((wordbuf = malloc(wordlen)) == NULL) {
456 558 (void) fprintf(stderr,
457 559 gettext("%s: out of memory\n"),
458 560 cmdname);
459 561 exit(2);
460 562 }
461 563 (void) strcpy(wordbuf, "\\<");
462 564 (void) strcpy(wordbuf + 2, s);
463 565 (void) strcpy(wordbuf + 2 + slen, "\\>");
464 566 } else {
465 567 if ((wordbuf = strdup(s)) == NULL) {
466 568 (void) fprintf(stderr,
467 569 gettext("%s: out of memory\n"),
468 570 cmdname);
469 571 exit(2);
470 572 }
471 573 }
472 574 pp->pattern = wordbuf;
473 575 pp->next = patterns;
474 576 patterns = pp;
475 577 if (np == NULL)
476 578 break;
477 579 s = np + 1;
478 580 }
479 581 }
480 582
481 583 /*
482 584 * Fix patterns.
483 585 * Must do after all arguments read, in case later -i option.
484 586 */
485 587 static void
486 588 fixpatterns(void)
487 589 {
488 590 PATTERN *pp;
489 591 int rv, fix_pattern, npatterns;
490 592
491 593 /*
492 594 * As REG_ANCHOR flag is not supported in the current Solaris,
493 595 * need to fix the specified pattern if -x is specified with
494 596 * grep or egrep
495 597 */
496 598 fix_pattern = !Fflag && xflag;
497 599
498 600 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
499 601 npatterns++;
500 602 if (fix_pattern) {
501 603 char *cp, *cq;
502 604 size_t plen, nplen;
503 605
504 606 plen = strlen(pp->pattern);
505 607 /* '^' pattern '$' */
506 608 nplen = 1 + plen + 1 + 1;
507 609 if ((cp = malloc(nplen)) == NULL) {
508 610 (void) fprintf(stderr,
509 611 gettext("%s: out of memory\n"),
510 612 cmdname);
511 613 exit(2);
512 614 }
513 615 cq = cp;
514 616 *cq++ = '^';
515 617 cq = strcpy(cq, pp->pattern) + plen;
516 618 *cq++ = '$';
517 619 *cq = '\0';
518 620 free(pp->pattern);
519 621 pp->pattern = cp;
520 622 }
↓ open down ↓ |
141 lines elided |
↑ open up ↑ |
521 623
522 624 if (Fflag) {
523 625 if (use_wchar) {
524 626 /*
525 627 * Fflag && mblocale && iflag
526 628 * Fflag && mblocale && !xflag
527 629 */
528 630 size_t n;
529 631 n = strlen(pp->pattern) + 1;
530 632 if ((pp->wpattern =
531 - malloc(sizeof (wchar_t) * n)) == NULL) {
633 + malloc(sizeof (wchar_t) * n)) == NULL) {
532 634 (void) fprintf(stderr,
533 635 gettext("%s: out of memory\n"),
534 636 cmdname);
535 637 exit(2);
536 638 }
537 639 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
538 640 (size_t)-1) {
539 641 (void) fprintf(stderr,
540 642 gettext("%s: failed to convert "
541 - "\"%s\" to wide-characters\n"),
643 + "\"%s\" to wide-characters\n"),
542 644 cmdname, pp->pattern);
543 645 exit(2);
544 646 }
545 647 if (iflag) {
546 648 wchar_t *wp;
547 649 for (wp = pp->wpattern; *wp != L'\0';
548 650 wp++) {
549 651 *wp = towlower((wint_t)*wp);
550 652 }
551 653 }
552 654 free(pp->pattern);
553 655 } else {
554 656 /*
555 657 * Fflag && mblocale && !iflag
556 658 * Fflag && !mblocale && iflag
557 659 * Fflag && !mblocale && !iflag
558 660 */
559 661 if (iflag) {
560 662 unsigned char *cp;
561 663 for (cp = (unsigned char *)pp->pattern;
562 664 *cp != '\0'; cp++) {
563 665 *cp = tolower(*cp);
564 666 }
565 667 }
566 668 }
567 669 /*
568 670 * fgrep: No regular expressions.
569 671 */
570 672 continue;
571 673 }
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
572 674
573 675 /*
574 676 * For non-fgrep, compile the regular expression,
575 677 * give an informative error message, and exit if
576 678 * it didn't compile.
577 679 */
578 680 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
579 681 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
580 682 (void) fprintf(stderr,
581 683 gettext("%s: RE error in %s: %s\n"),
582 - cmdname, pp->pattern, errstr);
684 + cmdname, pp->pattern, errstr);
583 685 exit(2);
584 686 }
585 687 free(pp->pattern);
586 688 }
587 689
588 690 /*
589 691 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
590 692 * Use the Boyer-Moore-Gosper algorithm if:
591 693 * - fgrep (Fflag)
592 694 * - singlebyte locale (!mblocale)
593 695 * - no ignoring case (!iflag)
594 696 * - no printing line numbers (!nflag)
595 697 * - no negating the output (nvflag)
596 698 * - only one pattern (npatterns == 1)
597 699 * - non zero length pattern (strlen(patterns->pattern) != 0)
598 700 *
599 701 * It's guaranteed patterns->pattern is still alive
600 702 * when Fflag && !mblocale.
601 703 */
602 704 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
603 705 (npatterns == 1) && (strlen(patterns->pattern) != 0);
604 706 }
605 707
606 708 /*
607 709 * Search a newline from the beginning of the string
608 710 */
609 711 static char *
610 712 find_nl(const char *ptr, size_t len)
611 713 {
612 714 while (len-- != 0) {
613 715 if (*ptr++ == '\n') {
614 716 return ((char *)--ptr);
615 717 }
616 718 }
617 719 return (NULL);
618 720 }
619 721
620 722 /*
621 723 * Search a newline from the end of the string
622 724 */
623 725 static char *
624 726 rfind_nl(const char *ptr, size_t len)
625 727 {
626 728 const char *uptr = ptr + len;
627 729 while (len--) {
628 730 if (*--uptr == '\n') {
629 731 return ((char *)uptr);
630 732 }
631 733 }
632 734 return (NULL);
633 735 }
634 736
635 737 /*
636 738 * Duplicate the specified string converting each character
637 739 * into a lower case.
638 740 */
639 741 static char *
640 742 istrdup(const char *s1)
641 743 {
642 744 static size_t ibuflen = 0;
643 745 static char *ibuf = NULL;
644 746 size_t slen;
645 747 char *p;
646 748
647 749 slen = strlen(s1);
648 750 if (slen >= ibuflen) {
649 751 /* ibuf does not fit to s1 */
650 752 ibuflen = slen + 1;
651 753 ibuf = realloc(ibuf, ibuflen);
652 754 if (ibuf == NULL) {
653 755 (void) fprintf(stderr,
654 756 gettext("%s: out of memory\n"), cmdname);
655 757 exit(2);
656 758 }
657 759 }
658 760 p = ibuf;
659 761 do {
660 762 *p++ = tolower(*s1);
661 763 } while (*s1++ != '\0');
662 764 return (ibuf);
663 765 }
664 766
665 767 /*
666 768 * Do grep on a single file.
667 769 * Return true in any lines matched.
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
668 770 *
669 771 * We have two strategies:
670 772 * The fast one is used when we have a single pattern with
671 773 * a string known to occur in the pattern. We can then
672 774 * do a BMG match on the whole buffer.
673 775 * This is an order of magnitude faster.
674 776 * Otherwise we split the buffer into lines,
675 777 * and check for a match on each line.
676 778 */
677 779 static int
678 -grep(int fd, char *fn)
780 +grep(int fd, const char *fn)
679 781 {
680 782 PATTERN *pp;
681 783 off_t data_len; /* length of the data chunk */
682 784 off_t line_len; /* length of the current line */
683 785 off_t line_offset; /* current line's offset from the beginning */
684 786 long long lineno;
685 787 long long matches = 0; /* Number of matching lines */
686 788 int newlinep; /* 0 if the last line of file has no newline */
687 789 char *ptr, *ptrend;
688 790
689 791
690 792 if (patterns == NULL)
691 793 return (0); /* no patterns to match -- just return */
692 794
693 795 pp = patterns;
694 796
695 797 if (use_bmg) {
696 798 bmgcomp(pp->pattern, strlen(pp->pattern));
697 799 }
698 800
699 801 if (use_wchar && outline == NULL) {
700 802 outbuflen = BUFSIZE + 1;
701 803 outline = malloc(sizeof (wchar_t) * outbuflen);
702 804 if (outline == NULL) {
703 805 (void) fprintf(stderr, gettext("%s: out of memory\n"),
704 806 cmdname);
705 807 exit(2);
706 808 }
707 809 }
708 810
709 811 if (prntbuf == NULL) {
710 812 prntbuflen = BUFSIZE;
711 813 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
712 814 (void) fprintf(stderr, gettext("%s: out of memory\n"),
713 815 cmdname);
714 816 exit(2);
715 817 }
716 818 }
717 819
718 820 line_offset = 0;
719 821 lineno = 0;
720 822 newlinep = 1;
721 823 data_len = 0;
722 824 for (; ; ) {
723 825 long count;
724 826 off_t offset = 0;
725 827
726 828 if (data_len == 0) {
727 829 /*
728 830 * If no data in the buffer, reset ptr
729 831 */
730 832 ptr = prntbuf;
731 833 }
732 834 if (ptr == prntbuf) {
↓ open down ↓ |
44 lines elided |
↑ open up ↑ |
733 835 /*
734 836 * The current data chunk starts from prntbuf.
735 837 * This means either the buffer has no data
736 838 * or the buffer has no newline.
737 839 * So, read more data from input.
738 840 */
739 841 count = read(fd, ptr + data_len, prntbuflen - data_len);
740 842 if (count < 0) {
741 843 /* read error */
742 844 if (cflag) {
743 - if (outfn) {
845 + if (outfn && !rflag) {
744 846 (void) fprintf(stdout,
745 847 "%s:", fn);
746 848 }
747 - if (!qflag) {
849 + if (!qflag && !rflag) {
748 850 (void) fprintf(stdout, "%lld\n",
749 851 matches);
750 852 }
751 853 }
752 854 return (0);
753 855 } else if (count == 0) {
754 856 /* no new data */
755 857 if (data_len == 0) {
756 858 /* end of file already reached */
757 859 break;
758 860 }
759 861 /* last line of file has no newline */
760 862 ptrend = ptr + data_len;
761 863 newlinep = 0;
762 864 goto L_start_process;
763 865 }
764 866 offset = data_len;
765 867 data_len += count;
766 868 }
767 869
768 870 /*
769 871 * Look for newline in the chunk
770 872 * between ptr + offset and ptr + data_len - offset.
771 873 */
772 874 ptrend = find_nl(ptr + offset, data_len - offset);
773 875 if (ptrend == NULL) {
774 876 /* no newline found in this chunk */
775 877 if (ptr > prntbuf) {
776 878 /*
777 879 * Move remaining data to the beginning
778 880 * of the buffer.
779 881 * Remaining data lie from ptr for
780 882 * data_len bytes.
781 883 */
782 884 (void) memmove(prntbuf, ptr, data_len);
783 885 }
784 886 if (data_len == prntbuflen) {
785 887 /*
786 888 * No enough room in the buffer
787 889 */
788 890 prntbuflen += BUFSIZE;
789 891 prntbuf = realloc(prntbuf, prntbuflen + 1);
790 892 if (prntbuf == NULL) {
791 893 (void) fprintf(stderr,
792 894 gettext("%s: out of memory\n"),
793 895 cmdname);
794 896 exit(2);
795 897 }
796 898 }
797 899 ptr = prntbuf;
798 900 /* read the next input */
799 901 continue;
800 902 }
801 903 L_start_process:
802 904
803 905 /*
804 906 * Beginning of the chunk: ptr
805 907 * End of the chunk: ptr + data_len
806 908 * Beginning of the line: ptr
807 909 * End of the line: ptrend
808 910 */
809 911
810 912 if (use_bmg) {
811 913 /*
812 914 * Use Boyer-Moore-Gosper algorithm to find out if
813 915 * this chunk (not this line) contains the specified
814 916 * pattern. If not, restart from the last line
815 917 * of this chunk.
816 918 */
817 919 char *bline;
818 920 bline = bmgexec(ptr, ptr + data_len);
819 921 if (bline == NULL) {
820 922 /*
821 923 * No pattern found in this chunk.
822 924 * Need to find the last line
823 925 * in this chunk.
824 926 */
825 927 ptrend = rfind_nl(ptr, data_len);
826 928
827 929 /*
828 930 * When this chunk does not contain newline,
829 931 * ptrend becomes NULL, which should happen
830 932 * when the last line of file does not end
831 933 * with a newline. At such a point,
832 934 * newlinep should have been set to 0.
833 935 * Therefore, just after jumping to
834 936 * L_skip_line, the main for-loop quits,
835 937 * and the line_len value won't be
836 938 * used.
837 939 */
838 940 line_len = ptrend - ptr;
839 941 goto L_skip_line;
840 942 }
841 943 if (bline > ptrend) {
842 944 /*
843 945 * Pattern found not in the first line
844 946 * of this chunk.
845 947 * Discard the first line.
846 948 */
847 949 line_len = ptrend - ptr;
848 950 goto L_skip_line;
849 951 }
850 952 /*
851 953 * Pattern found in the first line of this chunk.
↓ open down ↓ |
94 lines elided |
↑ open up ↑ |
852 954 * Using this result.
853 955 */
854 956 *ptrend = '\0';
855 957 line_len = ptrend - ptr;
856 958
857 959 /*
858 960 * before jumping to L_next_line,
859 961 * need to handle xflag if specified
860 962 */
861 963 if (xflag && (line_len != bmglen ||
862 - strcmp(bmgpat, ptr) != 0)) {
964 + strcmp(bmgpat, ptr) != 0)) {
863 965 /* didn't match */
864 966 pp = NULL;
865 967 } else {
866 968 pp = patterns; /* to make it happen */
867 969 }
868 970 goto L_next_line;
869 971 }
870 972 lineno++;
871 973 /*
872 974 * Line starts from ptr and ends at ptrend.
873 975 * line_len will be the length of the line.
874 976 */
875 977 *ptrend = '\0';
876 978 line_len = ptrend - ptr;
877 979
878 980 /*
879 981 * From now, the process will be performed based
880 982 * on the line from ptr to ptrend.
881 983 */
882 984 if (use_wchar) {
883 985 size_t len;
884 986
885 987 if (line_len >= outbuflen) {
886 988 outbuflen = line_len + 1;
887 989 outline = realloc(outline,
888 990 sizeof (wchar_t) * outbuflen);
889 991 if (outline == NULL) {
890 992 (void) fprintf(stderr,
891 993 gettext("%s: out of memory\n"),
892 994 cmdname);
893 995 exit(2);
894 996 }
895 997 }
896 998
897 999 len = mbstowcs(outline, ptr, line_len);
898 1000 if (len == (size_t)-1) {
899 1001 (void) fprintf(stderr, gettext(
900 1002 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
901 1003 cmdname, fn, lineno);
902 1004 /* never match a line with invalid sequence */
903 1005 goto L_skip_line;
904 1006 }
905 1007 outline[len] = L'\0';
906 1008
907 1009 if (iflag) {
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
908 1010 wchar_t *cp;
909 1011 for (cp = outline; *cp != '\0'; cp++) {
910 1012 *cp = towlower((wint_t)*cp);
911 1013 }
912 1014 }
913 1015
914 1016 if (xflag) {
915 1017 for (pp = patterns; pp; pp = pp->next) {
916 1018 if (outline[0] == pp->wpattern[0] &&
917 1019 wcscmp(outline,
918 - pp->wpattern) == 0) {
1020 + pp->wpattern) == 0) {
919 1021 /* matched */
920 1022 break;
921 1023 }
922 1024 }
923 1025 } else {
924 1026 for (pp = patterns; pp; pp = pp->next) {
925 1027 if (wcswcs(outline, pp->wpattern)
926 1028 != NULL) {
927 1029 /* matched */
928 1030 break;
929 1031 }
930 1032 }
931 1033 }
932 1034 } else if (Fflag) {
933 1035 /* fgrep in byte-oriented handling */
934 1036 char *fptr;
935 1037 if (iflag) {
936 1038 fptr = istrdup(ptr);
937 1039 } else {
938 1040 fptr = ptr;
939 1041 }
940 1042 if (xflag) {
941 1043 /* fgrep -x */
942 1044 for (pp = patterns; pp; pp = pp->next) {
943 1045 if (fptr[0] == pp->pattern[0] &&
944 1046 strcmp(fptr, pp->pattern) == 0) {
945 1047 /* matched */
946 1048 break;
947 1049 }
948 1050 }
949 1051 } else {
950 1052 for (pp = patterns; pp; pp = pp->next) {
951 1053 if (strstr(fptr, pp->pattern) != NULL) {
952 1054 /* matched */
953 1055 break;
954 1056 }
955 1057 }
956 1058 }
957 1059 } else {
958 1060 /* grep or egrep */
959 1061 for (pp = patterns; pp; pp = pp->next) {
960 1062 int rv;
961 1063
962 1064 rv = regexec(&pp->re, ptr, 0, NULL, 0);
963 1065 if (rv == REG_OK) {
964 1066 /* matched */
965 1067 break;
966 1068 }
967 1069
968 1070 switch (rv) {
969 1071 case REG_NOMATCH:
970 1072 break;
971 1073 case REG_ECHAR:
972 1074 (void) fprintf(stderr, gettext(
973 1075 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
974 1076 cmdname, fn, lineno);
975 1077 break;
976 1078 default:
977 1079 (void) regerror(rv, &pp->re, errstr,
978 1080 sizeof (errstr));
979 1081 (void) fprintf(stderr, gettext(
980 1082 "%s: input file \"%s\": line %lld: %s\n"),
981 1083 cmdname, fn, lineno, errstr);
982 1084 exit(2);
983 1085 }
984 1086 }
985 1087 }
986 1088
987 1089 L_next_line:
988 1090 /*
989 1091 * Here, if pp points to non-NULL, something has been matched
990 1092 * to the pattern.
991 1093 */
992 1094 if (nvflag == (pp != NULL)) {
993 1095 matches++;
994 1096 /*
995 1097 * Handle q, l, and c flags.
996 1098 */
997 1099 if (qflag) {
998 1100 /* no need to continue */
999 1101 /*
1000 1102 * End of this line is ptrend.
1001 1103 * We have read up to ptr + data_len.
1002 1104 */
1003 1105 off_t pos;
1004 1106 pos = ptr + data_len - (ptrend + 1);
1005 1107 (void) lseek(fd, -pos, SEEK_CUR);
1006 1108 exit(0);
1007 1109 }
1008 1110 if (lflag) {
1009 1111 (void) printf("%s\n", fn);
1010 1112 break;
1011 1113 }
1012 1114 if (!cflag) {
1013 1115 if (outfn) {
1014 1116 (void) printf("%s:", fn);
1015 1117 }
1016 1118 if (bflag) {
1017 1119 (void) printf("%lld:", (offset_t)
1018 1120 (line_offset / BSIZE));
1019 1121 }
1020 1122 if (nflag) {
1021 1123 (void) printf("%lld:", lineno);
1022 1124 }
1023 1125 *ptrend = '\n';
1024 1126 (void) fwrite(ptr, 1, line_len + 1, stdout);
1025 1127 }
1026 1128 if (ferror(stdout)) {
1027 1129 return (0);
1028 1130 }
1029 1131 }
1030 1132 L_skip_line:
1031 1133 if (!newlinep)
1032 1134 break;
1033 1135
1034 1136 data_len -= line_len + 1;
1035 1137 line_offset += line_len + 1;
1036 1138 ptr = ptrend + 1;
1037 1139 }
1038 1140
1039 1141 if (cflag) {
1040 1142 if (outfn) {
1041 1143 (void) printf("%s:", fn);
1042 1144 }
1043 1145 if (!qflag) {
1044 1146 (void) printf("%lld\n", matches);
1045 1147 }
1046 1148 }
1047 1149 return (matches != 0);
1048 1150 }
↓ open down ↓ |
120 lines elided |
↑ open up ↑ |
1049 1151
1050 1152 /*
1051 1153 * usage message for grep
1052 1154 */
1053 1155 static void
1054 1156 usage(void)
1055 1157 {
1056 1158 if (egrep || fgrep) {
1057 1159 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1058 1160 (void) fprintf(stderr,
1059 - gettext(" [-c|-l|-q] [-bhinsvx] "
1060 - "pattern_list [file ...]\n"));
1161 + gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1162 + "pattern_list [file ...]\n"));
1061 1163
1062 1164 (void) fprintf(stderr, "\t%s", cmdname);
1063 1165 (void) fprintf(stderr,
1064 - gettext(" [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1065 - "[-f pattern_file]... [file...]\n"));
1166 + gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1167 + "[-e pattern_list]... "
1168 + "[-f pattern_file]... [file...]\n"));
1066 1169 } else {
1067 1170 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1068 1171 (void) fprintf(stderr,
1069 - gettext(" [-c|-l|-q] [-bhinsvwx] "
1070 - "pattern_list [file ...]\n"));
1172 + gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1173 + "pattern_list [file ...]\n"));
1071 1174
1072 1175 (void) fprintf(stderr, "\t%s", cmdname);
1073 1176 (void) fprintf(stderr,
1074 - gettext(" [-c|-l|-q] [-bhinsvwx] [-e pattern_list]... "
1075 - "[-f pattern_file]... [file...]\n"));
1177 + gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1178 + "[-e pattern_list]... "
1179 + "[-f pattern_file]... [file...]\n"));
1076 1180
1077 1181 (void) fprintf(stderr, "\t%s", cmdname);
1078 1182 (void) fprintf(stderr,
1079 - gettext(" -E [-c|-l|-q] [-bhinsvx] "
1080 - "pattern_list [file ...]\n"));
1183 + gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1184 + "pattern_list [file ...]\n"));
1081 1185
1082 1186 (void) fprintf(stderr, "\t%s", cmdname);
1083 1187 (void) fprintf(stderr,
1084 - gettext(" -E [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1085 - "[-f pattern_file]... [file...]\n"));
1188 + gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1189 + "[-e pattern_list]... "
1190 + "[-f pattern_file]... [file...]\n"));
1086 1191
1087 1192 (void) fprintf(stderr, "\t%s", cmdname);
1088 1193 (void) fprintf(stderr,
1089 - gettext(" -F [-c|-l|-q] [-bhinsvx] "
1090 - "pattern_list [file ...]\n"));
1194 + gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1195 + "pattern_list [file ...]\n"));
1091 1196
1092 1197 (void) fprintf(stderr, "\t%s", cmdname);
1093 1198 (void) fprintf(stderr,
1094 1199 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1095 - "[-f pattern_file]... [file...]\n"));
1200 + "[-f pattern_file]... [file...]\n"));
1096 1201 }
1097 1202 exit(2);
1098 1203 /* NOTREACHED */
1099 1204 }
1100 1205
1101 1206 /*
1102 1207 * Compile literal pattern into BMG tables
1103 1208 */
1104 1209 static void
1105 1210 bmgcomp(char *pat, int len)
1106 1211 {
1107 1212 int i;
1108 1213 int tlen;
1109 1214 unsigned char *uc = (unsigned char *)pat;
1110 1215
1111 1216 bmglen = len;
1112 1217 bmgpat = pat;
1113 1218
1114 1219 for (i = 0; i < M_CSETSIZE; i++) {
1115 1220 bmgtab[i] = len;
1116 1221 }
1117 1222
1118 1223 len--;
1119 1224 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1120 1225 bmgtab[*uc++] = tlen;
1121 1226 }
1122 1227 }
1123 1228
1124 1229 /*
1125 1230 * BMG search.
1126 1231 */
1127 1232 static char *
1128 1233 bmgexec(char *str, char *end)
1129 1234 {
1130 1235 int t;
1131 1236 char *k, *s, *p;
1132 1237
1133 1238 k = str + bmglen - 1;
1134 1239 if (bmglen == 1) {
1135 1240 return (memchr(str, bmgpat[0], end - str));
1136 1241 }
1137 1242 for (; ; ) {
1138 1243 /* inner loop, should be most optimized */
1139 1244 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1140 1245 k += t;
1141 1246 }
1142 1247 if (k >= end) {
1143 1248 return (NULL);
1144 1249 }
1145 1250 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1146 1251 if (p == bmgpat) {
1147 1252 return (s);
1148 1253 }
1149 1254 }
1150 1255 k++;
1151 1256 }
1152 1257 /* NOTREACHED */
1153 1258 }
↓ open down ↓ |
48 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX