Print this page
3737 grep does not support -H option
3759 egrep(1) and fgrep(1) -s flag does not hide -c output
Reviewed by: Albert Lee <trisk@nexenta.com>
Reviewed by: Andy Stormont <andyjstormont@gmail.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/grep_xpg4/grep.c
+++ new/usr/src/cmd/grep_xpg4/grep.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License, Version 1.0 only
6 6 * (the "License"). You may not use this file except in compliance
7 7 * with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
21 21 */
22 22 /*
23 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * grep - pattern matching program - combined grep, egrep, and fgrep.
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
29 29 * Based on MKS grep command, with XCU & Solaris mods.
30 30 */
31 31
32 32 /*
33 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 34 *
35 35 */
36 36
37 37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38 38
39 +/*
40 + * Copyright 2013 Damian Bogel. All rights reserved.
41 + */
42 +
39 43 #include <string.h>
40 44 #include <stdlib.h>
41 45 #include <ctype.h>
42 46 #include <stdarg.h>
43 47 #include <regex.h>
44 48 #include <limits.h>
45 49 #include <sys/types.h>
46 50 #include <sys/stat.h>
47 51 #include <fcntl.h>
48 52 #include <stdio.h>
49 53 #include <locale.h>
50 54 #include <wchar.h>
51 55 #include <errno.h>
52 56 #include <unistd.h>
53 57 #include <wctype.h>
54 58 #include <ftw.h>
55 59 #include <sys/param.h>
56 60
61 +#define STDIN_FILENAME gettext("(standard input)")
62 +
57 63 #define BSIZE 512 /* Size of block for -b */
58 64 #define BUFSIZE 8192 /* Input buffer size */
59 65 #define MAX_DEPTH 1000 /* how deep to recurse */
60 66
61 67 #define M_CSETSIZE 256 /* singlebyte chars */
62 68 static int bmglen; /* length of BMG pattern */
63 69 static char *bmgpat; /* BMG pattern */
64 70 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
65 71
66 72 typedef struct _PATTERN {
67 73 char *pattern; /* original pattern */
68 74 wchar_t *wpattern; /* wide, lowercased pattern */
69 75 struct _PATTERN *next;
70 76 regex_t re; /* compiled pattern */
71 77 } PATTERN;
72 78
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
73 79 static PATTERN *patterns;
74 80 static char errstr[128]; /* regerror string buffer */
75 81 static int regflags = 0; /* regcomp options */
76 82 static int matched = 0; /* return of the grep() */
77 83 static int errors = 0; /* count of errors */
78 84 static uchar_t fgrep = 0; /* Invoked as fgrep */
79 85 static uchar_t egrep = 0; /* Invoked as egrep */
80 86 static uchar_t nvflag = 1; /* Print matching lines */
81 87 static uchar_t cflag; /* Count of matches */
82 88 static uchar_t iflag; /* Case insensitve matching */
89 +static uchar_t Hflag; /* Precede lines by file name */
83 90 static uchar_t hflag; /* Supress printing of filename */
84 91 static uchar_t lflag; /* Print file names of matches */
85 92 static uchar_t nflag; /* Precede lines by line number */
86 93 static uchar_t rflag; /* Search directories recursively */
87 94 static uchar_t bflag; /* Preccede matches by block number */
88 95 static uchar_t sflag; /* Suppress file error messages */
89 96 static uchar_t qflag; /* Suppress standard output */
90 97 static uchar_t wflag; /* Search for expression as a word */
91 98 static uchar_t xflag; /* Anchoring */
92 99 static uchar_t Eflag; /* Egrep or -E flag */
93 100 static uchar_t Fflag; /* Fgrep or -F flag */
94 101 static uchar_t Rflag; /* Like rflag, but follow symlinks */
95 102 static uchar_t outfn; /* Put out file name */
96 103 static char *cmdname;
97 104
98 105 static int use_wchar, use_bmg, mblocale;
99 106
100 107 static size_t outbuflen, prntbuflen;
101 108 static char *prntbuf;
102 109 static wchar_t *outline;
103 110
104 111 static void addfile(const char *fn);
105 112 static void addpattern(char *s);
106 113 static void fixpatterns(void);
107 114 static void usage(void);
108 115 static int grep(int, const char *);
109 116 static void bmgcomp(char *, int);
110 117 static char *bmgexec(char *, char *);
111 118 static int recursive(const char *, const struct stat *, int, struct FTW *);
112 119 static void process_path(const char *);
113 120 static void process_file(const char *, int);
114 121
115 122 /*
116 123 * mainline for grep
117 124 */
118 125 int
119 126 main(int argc, char **argv)
120 127 {
121 128 char *ap;
122 129 int c;
123 130 int fflag = 0;
124 131 int i, n_pattern = 0, n_file = 0;
125 132 char **pattern_list = NULL;
126 133 char **file_list = NULL;
127 134
128 135 (void) setlocale(LC_ALL, "");
129 136 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
130 137 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
131 138 #endif
132 139 (void) textdomain(TEXT_DOMAIN);
133 140
134 141 /*
135 142 * true if this is running on the multibyte locale
136 143 */
137 144 mblocale = (MB_CUR_MAX > 1);
138 145 /*
139 146 * Skip leading slashes
140 147 */
141 148 cmdname = argv[0];
142 149 if (ap = strrchr(cmdname, '/'))
143 150 cmdname = ap + 1;
144 151
145 152 ap = cmdname;
146 153 /*
147 154 * Detect egrep/fgrep via command name, map to -E and -F options.
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
148 155 */
149 156 if (*ap == 'e' || *ap == 'E') {
150 157 regflags |= REG_EXTENDED;
151 158 egrep++;
152 159 } else {
153 160 if (*ap == 'f' || *ap == 'F') {
154 161 fgrep++;
155 162 }
156 163 }
157 164
158 - while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
165 + while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
159 166 switch (c) {
160 167 case 'v': /* POSIX: negate matches */
161 168 nvflag = 0;
162 169 break;
163 170
164 171 case 'c': /* POSIX: write count */
165 172 cflag++;
166 173 break;
167 174
168 175 case 'i': /* POSIX: ignore case */
169 176 iflag++;
170 177 regflags |= REG_ICASE;
171 178 break;
172 179
173 180 case 'l': /* POSIX: Write filenames only */
174 181 lflag++;
175 182 break;
176 183
177 184 case 'n': /* POSIX: Write line numbers */
178 185 nflag++;
179 186 break;
180 187
181 188 case 'r': /* Solaris: search recursively */
182 189 rflag++;
183 190 break;
184 191
185 192 case 'b': /* Solaris: Write file block numbers */
186 193 bflag++;
187 194 break;
188 195
189 196 case 's': /* POSIX: No error msgs for files */
190 197 sflag++;
191 198 break;
192 199
193 200 case 'e': /* POSIX: pattern list */
194 201 n_pattern++;
195 202 pattern_list = realloc(pattern_list,
196 203 sizeof (char *) * n_pattern);
197 204 if (pattern_list == NULL) {
198 205 (void) fprintf(stderr,
199 206 gettext("%s: out of memory\n"),
200 207 cmdname);
201 208 exit(2);
202 209 }
203 210 *(pattern_list + n_pattern - 1) = optarg;
204 211 break;
205 212
206 213 case 'f': /* POSIX: pattern file */
207 214 fflag = 1;
208 215 n_file++;
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
209 216 file_list = realloc(file_list,
210 217 sizeof (char *) * n_file);
211 218 if (file_list == NULL) {
212 219 (void) fprintf(stderr,
213 220 gettext("%s: out of memory\n"),
214 221 cmdname);
215 222 exit(2);
216 223 }
217 224 *(file_list + n_file - 1) = optarg;
218 225 break;
226 +
227 + /* based on options order h or H is set as in GNU grep */
219 228 case 'h': /* Solaris: supress printing of file name */
220 229 hflag = 1;
230 + Hflag = 0;
221 231 break;
232 + /* Solaris: precede every matching with file name */
233 + case 'H':
234 + Hflag = 1;
235 + hflag = 0;
236 + break;
222 237
223 238 case 'q': /* POSIX: quiet: status only */
224 239 qflag++;
225 240 break;
226 241
227 242 case 'w': /* Solaris: treat pattern as word */
228 243 wflag++;
229 244 break;
230 245
231 246 case 'x': /* POSIX: full line matches */
232 247 xflag++;
233 248 regflags |= REG_ANCHOR;
234 249 break;
235 250
236 251 case 'E': /* POSIX: Extended RE's */
237 252 regflags |= REG_EXTENDED;
238 253 Eflag++;
239 254 break;
240 255
241 256 case 'F': /* POSIX: strings, not RE's */
242 257 Fflag++;
243 258 break;
244 259
245 260 case 'R': /* Solaris: like rflag, but follow symlinks */
246 261 Rflag++;
247 262 rflag++;
248 263 break;
249 264
250 265 default:
251 266 usage();
252 267 }
253 268 }
254 269 /*
255 270 * If we're invoked as egrep or fgrep we need to do some checks
256 271 */
257 272
258 273 if (egrep || fgrep) {
259 274 /*
260 275 * Use of -E or -F with egrep or fgrep is illegal
261 276 */
262 277 if (Eflag || Fflag)
263 278 usage();
264 279 /*
265 280 * Don't allow use of wflag with egrep / fgrep
266 281 */
267 282 if (wflag)
268 283 usage();
269 284 /*
270 285 * For Solaris the -s flag is equivalent to XCU -q
271 286 */
272 287 if (sflag)
273 288 qflag++;
274 289 /*
275 290 * done with above checks - set the appropriate flags
276 291 */
277 292 if (egrep)
278 293 Eflag++;
279 294 else /* Else fgrep */
280 295 Fflag++;
281 296 }
282 297
283 298 if (wflag && (Eflag || Fflag)) {
284 299 /*
285 300 * -w cannot be specified with grep -F
286 301 */
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
287 302 usage();
288 303 }
289 304
290 305 /*
291 306 * -E and -F flags are mutually exclusive - check for this
292 307 */
293 308 if (Eflag && Fflag)
294 309 usage();
295 310
296 311 /*
312 + * -l overrides -H like in GNU grep
313 + */
314 + if (lflag)
315 + Hflag = 0;
316 +
317 + /*
297 318 * -c, -l and -q flags are mutually exclusive
298 319 * We have -c override -l like in Solaris.
299 320 * -q overrides -l & -c programmatically in grep() function.
300 321 */
301 322 if (cflag && lflag)
302 323 lflag = 0;
303 324
304 325 argv += optind - 1;
305 326 argc -= optind - 1;
306 327
307 328 /*
308 329 * Now handling -e and -f option
309 330 */
310 331 if (pattern_list) {
311 332 for (i = 0; i < n_pattern; i++) {
312 333 addpattern(pattern_list[i]);
313 334 }
314 335 free(pattern_list);
315 336 }
316 337 if (file_list) {
317 338 for (i = 0; i < n_file; i++) {
318 339 addfile(file_list[i]);
319 340 }
320 341 free(file_list);
321 342 }
322 343
323 344 /*
324 345 * No -e or -f? Make sure there is one more arg, use it as the pattern.
325 346 */
326 347 if (patterns == NULL && !fflag) {
327 348 if (argc < 2)
328 349 usage();
329 350 addpattern(argv[1]);
330 351 argc--;
331 352 argv++;
332 353 }
333 354
334 355 /*
335 356 * If -x flag is not specified or -i flag is specified
336 357 * with fgrep in a multibyte locale, need to use
337 358 * the wide character APIs. Otherwise, byte-oriented
338 359 * process will be done.
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
339 360 */
340 361 use_wchar = Fflag && mblocale && (!xflag || iflag);
341 362
342 363 /*
343 364 * Compile Patterns and also decide if BMG can be used
344 365 */
345 366 fixpatterns();
346 367
347 368 /* Process all files: stdin, or rest of arg list */
348 369 if (argc < 2) {
349 - matched = grep(0, gettext("(standard input)"));
370 + matched = grep(0, STDIN_FILENAME);
350 371 } else {
351 - if (argc > 2 && hflag == 0)
372 + if (Hflag || (argc > 2 && hflag == 0))
352 373 outfn = 1; /* Print filename on match line */
353 374 for (argv++; *argv != NULL; argv++) {
354 375 process_path(*argv);
355 376 }
356 377 }
357 378 /*
358 379 * Return() here is used instead of exit
359 380 */
360 381
361 382 (void) fflush(stdout);
362 383
363 384 if (errors)
364 385 return (2);
365 386 return (matched ? 0 : 1);
366 387 }
367 388
368 389 static void
369 390 process_path(const char *path)
370 391 {
371 392 struct stat st;
372 393 int walkflags = FTW_CHDIR;
373 394 char *buf = NULL;
374 395
375 396 if (rflag) {
376 397 if (stat(path, &st) != -1 &&
377 398 (st.st_mode & S_IFMT) == S_IFDIR) {
378 399 outfn = 1; /* Print filename */
379 400
380 401 /*
381 402 * Add trailing slash if arg
382 403 * is directory, to resolve symlinks.
383 404 */
384 405 if (path[strlen(path) - 1] != '/') {
385 406 (void) asprintf(&buf, "%s/", path);
386 407 if (buf != NULL)
387 408 path = buf;
388 409 }
389 410
390 411 /*
391 412 * Search through subdirs if path is directory.
392 413 * Don't follow symlinks if Rflag is not set.
393 414 */
394 415 if (!Rflag)
395 416 walkflags |= FTW_PHYS;
396 417
397 418 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
398 419 if (!sflag)
399 420 (void) fprintf(stderr,
400 421 gettext("%s: can't open \"%s\"\n"),
401 422 cmdname, path);
402 423 errors = 1;
403 424 }
404 425 return;
405 426 }
406 427 }
407 428 process_file(path, 0);
408 429 }
409 430
410 431 /*
411 432 * Read and process all files in directory recursively.
412 433 */
413 434 static int
414 435 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
415 436 {
416 437 /*
417 438 * Process files and follow symlinks if Rflag set.
418 439 */
419 440 if (info != FTW_F) {
420 441 /* Report broken symlinks and unreadable files */
421 442 if (!sflag &&
422 443 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
423 444 (void) fprintf(stderr,
424 445 gettext("%s: can't open \"%s\"\n"), cmdname, name);
425 446 }
426 447 return (0);
427 448 }
428 449
429 450
430 451 /* Skip devices and pipes if Rflag is not set */
431 452 if (!Rflag && !S_ISREG(statp->st_mode))
432 453 return (0);
433 454 /* Pass offset to relative name from FTW_CHDIR */
434 455 process_file(name, ftw->base);
435 456 return (0);
436 457 }
437 458
438 459 /*
439 460 * Opens file and call grep function.
440 461 */
441 462 static void
442 463 process_file(const char *name, int base)
443 464 {
444 465 int fd;
445 466
446 467 if ((fd = open(name + base, O_RDONLY)) == -1) {
447 468 errors = 1;
448 469 if (!sflag) /* Silent mode */
449 470 (void) fprintf(stderr, gettext(
450 471 "%s: can't open \"%s\"\n"),
451 472 cmdname, name);
452 473 return;
453 474 }
454 475 matched |= grep(fd, name);
455 476 (void) close(fd);
456 477
457 478 if (ferror(stdout)) {
458 479 (void) fprintf(stderr, gettext(
459 480 "%s: error writing to stdout\n"),
460 481 cmdname);
461 482 (void) fflush(stdout);
462 483 exit(2);
463 484 }
464 485
465 486 }
466 487
467 488 /*
468 489 * Add a file of strings to the pattern list.
469 490 */
470 491 static void
471 492 addfile(const char *fn)
472 493 {
473 494 FILE *fp;
474 495 char *inbuf;
475 496 char *bufp;
476 497 size_t bufsiz, buflen, bufused;
477 498
478 499 /*
479 500 * Open the pattern file
480 501 */
481 502 if ((fp = fopen(fn, "r")) == NULL) {
482 503 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
483 504 cmdname, fn);
484 505 exit(2);
485 506 }
486 507 bufsiz = BUFSIZE;
487 508 if ((inbuf = malloc(bufsiz)) == NULL) {
488 509 (void) fprintf(stderr,
489 510 gettext("%s: out of memory\n"), cmdname);
490 511 exit(2);
491 512 }
492 513 bufp = inbuf;
493 514 bufused = 0;
494 515 /*
495 516 * Read in the file, reallocing as we need more memory
496 517 */
497 518 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
498 519 buflen = strlen(bufp);
499 520 bufused += buflen;
500 521 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
501 522 /*
502 523 * if this line does not fit to the buffer,
503 524 * realloc larger buffer
504 525 */
505 526 bufsiz += BUFSIZE;
506 527 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
507 528 (void) fprintf(stderr,
508 529 gettext("%s: out of memory\n"),
509 530 cmdname);
510 531 exit(2);
511 532 }
512 533 bufp = inbuf + bufused;
513 534 continue;
514 535 }
515 536 if (bufp[buflen - 1] == '\n') {
516 537 bufp[--buflen] = '\0';
517 538 }
518 539 addpattern(inbuf);
519 540
520 541 bufp = inbuf;
521 542 bufused = 0;
522 543 }
523 544 free(inbuf);
524 545 (void) fclose(fp);
525 546 }
526 547
527 548 /*
528 549 * Add a string to the pattern list.
529 550 */
530 551 static void
531 552 addpattern(char *s)
532 553 {
533 554 PATTERN *pp;
534 555 char *wordbuf;
535 556 char *np;
536 557
537 558 for (; ; ) {
538 559 np = strchr(s, '\n');
539 560 if (np != NULL)
540 561 *np = '\0';
541 562 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
542 563 (void) fprintf(stderr, gettext(
543 564 "%s: out of memory\n"),
544 565 cmdname);
545 566 exit(2);
546 567 }
547 568 if (wflag) {
548 569 /*
549 570 * Solaris wflag support: Add '<' '>' to pattern to
550 571 * select it as a word. Doesn't make sense with -F
551 572 * but we're Libertarian.
552 573 */
553 574 size_t slen, wordlen;
554 575
555 576 slen = strlen(s);
556 577 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
557 578 if ((wordbuf = malloc(wordlen)) == NULL) {
558 579 (void) fprintf(stderr,
559 580 gettext("%s: out of memory\n"),
560 581 cmdname);
561 582 exit(2);
562 583 }
563 584 (void) strcpy(wordbuf, "\\<");
564 585 (void) strcpy(wordbuf + 2, s);
565 586 (void) strcpy(wordbuf + 2 + slen, "\\>");
566 587 } else {
567 588 if ((wordbuf = strdup(s)) == NULL) {
568 589 (void) fprintf(stderr,
569 590 gettext("%s: out of memory\n"),
570 591 cmdname);
571 592 exit(2);
572 593 }
573 594 }
574 595 pp->pattern = wordbuf;
575 596 pp->next = patterns;
576 597 patterns = pp;
577 598 if (np == NULL)
578 599 break;
579 600 s = np + 1;
580 601 }
581 602 }
582 603
583 604 /*
584 605 * Fix patterns.
585 606 * Must do after all arguments read, in case later -i option.
586 607 */
587 608 static void
588 609 fixpatterns(void)
589 610 {
590 611 PATTERN *pp;
591 612 int rv, fix_pattern, npatterns;
592 613
593 614 /*
594 615 * As REG_ANCHOR flag is not supported in the current Solaris,
595 616 * need to fix the specified pattern if -x is specified with
596 617 * grep or egrep
597 618 */
598 619 fix_pattern = !Fflag && xflag;
599 620
600 621 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
601 622 npatterns++;
602 623 if (fix_pattern) {
603 624 char *cp, *cq;
604 625 size_t plen, nplen;
605 626
606 627 plen = strlen(pp->pattern);
607 628 /* '^' pattern '$' */
608 629 nplen = 1 + plen + 1 + 1;
609 630 if ((cp = malloc(nplen)) == NULL) {
610 631 (void) fprintf(stderr,
611 632 gettext("%s: out of memory\n"),
612 633 cmdname);
613 634 exit(2);
614 635 }
615 636 cq = cp;
616 637 *cq++ = '^';
617 638 cq = strcpy(cq, pp->pattern) + plen;
618 639 *cq++ = '$';
619 640 *cq = '\0';
620 641 free(pp->pattern);
621 642 pp->pattern = cp;
622 643 }
623 644
624 645 if (Fflag) {
625 646 if (use_wchar) {
626 647 /*
627 648 * Fflag && mblocale && iflag
628 649 * Fflag && mblocale && !xflag
629 650 */
630 651 size_t n;
631 652 n = strlen(pp->pattern) + 1;
632 653 if ((pp->wpattern =
633 654 malloc(sizeof (wchar_t) * n)) == NULL) {
634 655 (void) fprintf(stderr,
635 656 gettext("%s: out of memory\n"),
636 657 cmdname);
637 658 exit(2);
638 659 }
639 660 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
640 661 (size_t)-1) {
641 662 (void) fprintf(stderr,
642 663 gettext("%s: failed to convert "
643 664 "\"%s\" to wide-characters\n"),
644 665 cmdname, pp->pattern);
645 666 exit(2);
646 667 }
647 668 if (iflag) {
648 669 wchar_t *wp;
649 670 for (wp = pp->wpattern; *wp != L'\0';
650 671 wp++) {
651 672 *wp = towlower((wint_t)*wp);
652 673 }
653 674 }
654 675 free(pp->pattern);
655 676 } else {
656 677 /*
657 678 * Fflag && mblocale && !iflag
658 679 * Fflag && !mblocale && iflag
659 680 * Fflag && !mblocale && !iflag
660 681 */
661 682 if (iflag) {
662 683 unsigned char *cp;
663 684 for (cp = (unsigned char *)pp->pattern;
664 685 *cp != '\0'; cp++) {
665 686 *cp = tolower(*cp);
666 687 }
667 688 }
668 689 }
669 690 /*
670 691 * fgrep: No regular expressions.
671 692 */
672 693 continue;
673 694 }
674 695
675 696 /*
676 697 * For non-fgrep, compile the regular expression,
677 698 * give an informative error message, and exit if
678 699 * it didn't compile.
679 700 */
680 701 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
681 702 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
682 703 (void) fprintf(stderr,
683 704 gettext("%s: RE error in %s: %s\n"),
684 705 cmdname, pp->pattern, errstr);
685 706 exit(2);
686 707 }
687 708 free(pp->pattern);
688 709 }
689 710
690 711 /*
691 712 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
692 713 * Use the Boyer-Moore-Gosper algorithm if:
693 714 * - fgrep (Fflag)
694 715 * - singlebyte locale (!mblocale)
695 716 * - no ignoring case (!iflag)
696 717 * - no printing line numbers (!nflag)
697 718 * - no negating the output (nvflag)
698 719 * - only one pattern (npatterns == 1)
699 720 * - non zero length pattern (strlen(patterns->pattern) != 0)
700 721 *
701 722 * It's guaranteed patterns->pattern is still alive
702 723 * when Fflag && !mblocale.
703 724 */
704 725 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
705 726 (npatterns == 1) && (strlen(patterns->pattern) != 0);
706 727 }
707 728
708 729 /*
709 730 * Search a newline from the beginning of the string
710 731 */
711 732 static char *
712 733 find_nl(const char *ptr, size_t len)
713 734 {
714 735 while (len-- != 0) {
715 736 if (*ptr++ == '\n') {
716 737 return ((char *)--ptr);
717 738 }
718 739 }
719 740 return (NULL);
720 741 }
721 742
722 743 /*
723 744 * Search a newline from the end of the string
724 745 */
725 746 static char *
726 747 rfind_nl(const char *ptr, size_t len)
727 748 {
728 749 const char *uptr = ptr + len;
729 750 while (len--) {
730 751 if (*--uptr == '\n') {
731 752 return ((char *)uptr);
732 753 }
733 754 }
734 755 return (NULL);
735 756 }
736 757
737 758 /*
738 759 * Duplicate the specified string converting each character
739 760 * into a lower case.
740 761 */
741 762 static char *
742 763 istrdup(const char *s1)
743 764 {
744 765 static size_t ibuflen = 0;
745 766 static char *ibuf = NULL;
746 767 size_t slen;
747 768 char *p;
748 769
749 770 slen = strlen(s1);
750 771 if (slen >= ibuflen) {
751 772 /* ibuf does not fit to s1 */
752 773 ibuflen = slen + 1;
753 774 ibuf = realloc(ibuf, ibuflen);
754 775 if (ibuf == NULL) {
755 776 (void) fprintf(stderr,
756 777 gettext("%s: out of memory\n"), cmdname);
757 778 exit(2);
758 779 }
759 780 }
760 781 p = ibuf;
761 782 do {
762 783 *p++ = tolower(*s1);
763 784 } while (*s1++ != '\0');
764 785 return (ibuf);
765 786 }
766 787
767 788 /*
768 789 * Do grep on a single file.
769 790 * Return true in any lines matched.
770 791 *
771 792 * We have two strategies:
772 793 * The fast one is used when we have a single pattern with
773 794 * a string known to occur in the pattern. We can then
774 795 * do a BMG match on the whole buffer.
775 796 * This is an order of magnitude faster.
776 797 * Otherwise we split the buffer into lines,
777 798 * and check for a match on each line.
778 799 */
779 800 static int
780 801 grep(int fd, const char *fn)
781 802 {
782 803 PATTERN *pp;
783 804 off_t data_len; /* length of the data chunk */
784 805 off_t line_len; /* length of the current line */
785 806 off_t line_offset; /* current line's offset from the beginning */
786 807 long long lineno;
787 808 long long matches = 0; /* Number of matching lines */
788 809 int newlinep; /* 0 if the last line of file has no newline */
789 810 char *ptr, *ptrend;
790 811
791 812
792 813 if (patterns == NULL)
793 814 return (0); /* no patterns to match -- just return */
794 815
795 816 pp = patterns;
796 817
797 818 if (use_bmg) {
798 819 bmgcomp(pp->pattern, strlen(pp->pattern));
799 820 }
800 821
801 822 if (use_wchar && outline == NULL) {
802 823 outbuflen = BUFSIZE + 1;
803 824 outline = malloc(sizeof (wchar_t) * outbuflen);
804 825 if (outline == NULL) {
805 826 (void) fprintf(stderr, gettext("%s: out of memory\n"),
806 827 cmdname);
807 828 exit(2);
808 829 }
809 830 }
810 831
811 832 if (prntbuf == NULL) {
812 833 prntbuflen = BUFSIZE;
813 834 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
814 835 (void) fprintf(stderr, gettext("%s: out of memory\n"),
815 836 cmdname);
816 837 exit(2);
817 838 }
818 839 }
819 840
820 841 line_offset = 0;
821 842 lineno = 0;
822 843 newlinep = 1;
823 844 data_len = 0;
824 845 for (; ; ) {
825 846 long count;
826 847 off_t offset = 0;
827 848
828 849 if (data_len == 0) {
829 850 /*
830 851 * If no data in the buffer, reset ptr
831 852 */
832 853 ptr = prntbuf;
833 854 }
834 855 if (ptr == prntbuf) {
835 856 /*
836 857 * The current data chunk starts from prntbuf.
837 858 * This means either the buffer has no data
838 859 * or the buffer has no newline.
839 860 * So, read more data from input.
840 861 */
841 862 count = read(fd, ptr + data_len, prntbuflen - data_len);
842 863 if (count < 0) {
843 864 /* read error */
844 865 if (cflag) {
845 866 if (outfn && !rflag) {
846 867 (void) fprintf(stdout,
847 868 "%s:", fn);
848 869 }
849 870 if (!qflag && !rflag) {
850 871 (void) fprintf(stdout, "%lld\n",
851 872 matches);
852 873 }
853 874 }
854 875 return (0);
855 876 } else if (count == 0) {
856 877 /* no new data */
857 878 if (data_len == 0) {
858 879 /* end of file already reached */
859 880 break;
860 881 }
861 882 /* last line of file has no newline */
862 883 ptrend = ptr + data_len;
863 884 newlinep = 0;
864 885 goto L_start_process;
865 886 }
866 887 offset = data_len;
867 888 data_len += count;
868 889 }
869 890
870 891 /*
871 892 * Look for newline in the chunk
872 893 * between ptr + offset and ptr + data_len - offset.
873 894 */
874 895 ptrend = find_nl(ptr + offset, data_len - offset);
875 896 if (ptrend == NULL) {
876 897 /* no newline found in this chunk */
877 898 if (ptr > prntbuf) {
878 899 /*
879 900 * Move remaining data to the beginning
880 901 * of the buffer.
881 902 * Remaining data lie from ptr for
882 903 * data_len bytes.
883 904 */
884 905 (void) memmove(prntbuf, ptr, data_len);
885 906 }
886 907 if (data_len == prntbuflen) {
887 908 /*
888 909 * No enough room in the buffer
889 910 */
890 911 prntbuflen += BUFSIZE;
891 912 prntbuf = realloc(prntbuf, prntbuflen + 1);
892 913 if (prntbuf == NULL) {
893 914 (void) fprintf(stderr,
894 915 gettext("%s: out of memory\n"),
895 916 cmdname);
896 917 exit(2);
897 918 }
898 919 }
899 920 ptr = prntbuf;
900 921 /* read the next input */
901 922 continue;
902 923 }
903 924 L_start_process:
904 925
905 926 /*
906 927 * Beginning of the chunk: ptr
907 928 * End of the chunk: ptr + data_len
908 929 * Beginning of the line: ptr
909 930 * End of the line: ptrend
910 931 */
911 932
912 933 if (use_bmg) {
913 934 /*
914 935 * Use Boyer-Moore-Gosper algorithm to find out if
915 936 * this chunk (not this line) contains the specified
916 937 * pattern. If not, restart from the last line
917 938 * of this chunk.
918 939 */
919 940 char *bline;
920 941 bline = bmgexec(ptr, ptr + data_len);
921 942 if (bline == NULL) {
922 943 /*
923 944 * No pattern found in this chunk.
924 945 * Need to find the last line
925 946 * in this chunk.
926 947 */
927 948 ptrend = rfind_nl(ptr, data_len);
928 949
929 950 /*
930 951 * When this chunk does not contain newline,
931 952 * ptrend becomes NULL, which should happen
932 953 * when the last line of file does not end
933 954 * with a newline. At such a point,
934 955 * newlinep should have been set to 0.
935 956 * Therefore, just after jumping to
936 957 * L_skip_line, the main for-loop quits,
937 958 * and the line_len value won't be
938 959 * used.
939 960 */
940 961 line_len = ptrend - ptr;
941 962 goto L_skip_line;
942 963 }
943 964 if (bline > ptrend) {
944 965 /*
945 966 * Pattern found not in the first line
946 967 * of this chunk.
947 968 * Discard the first line.
948 969 */
949 970 line_len = ptrend - ptr;
950 971 goto L_skip_line;
951 972 }
952 973 /*
953 974 * Pattern found in the first line of this chunk.
954 975 * Using this result.
955 976 */
956 977 *ptrend = '\0';
957 978 line_len = ptrend - ptr;
958 979
959 980 /*
960 981 * before jumping to L_next_line,
961 982 * need to handle xflag if specified
962 983 */
963 984 if (xflag && (line_len != bmglen ||
964 985 strcmp(bmgpat, ptr) != 0)) {
965 986 /* didn't match */
966 987 pp = NULL;
967 988 } else {
968 989 pp = patterns; /* to make it happen */
969 990 }
970 991 goto L_next_line;
971 992 }
972 993 lineno++;
973 994 /*
974 995 * Line starts from ptr and ends at ptrend.
975 996 * line_len will be the length of the line.
976 997 */
977 998 *ptrend = '\0';
978 999 line_len = ptrend - ptr;
979 1000
980 1001 /*
981 1002 * From now, the process will be performed based
982 1003 * on the line from ptr to ptrend.
983 1004 */
984 1005 if (use_wchar) {
985 1006 size_t len;
986 1007
987 1008 if (line_len >= outbuflen) {
988 1009 outbuflen = line_len + 1;
989 1010 outline = realloc(outline,
990 1011 sizeof (wchar_t) * outbuflen);
991 1012 if (outline == NULL) {
992 1013 (void) fprintf(stderr,
993 1014 gettext("%s: out of memory\n"),
994 1015 cmdname);
995 1016 exit(2);
996 1017 }
997 1018 }
998 1019
999 1020 len = mbstowcs(outline, ptr, line_len);
1000 1021 if (len == (size_t)-1) {
1001 1022 (void) fprintf(stderr, gettext(
1002 1023 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1003 1024 cmdname, fn, lineno);
1004 1025 /* never match a line with invalid sequence */
1005 1026 goto L_skip_line;
1006 1027 }
1007 1028 outline[len] = L'\0';
1008 1029
1009 1030 if (iflag) {
1010 1031 wchar_t *cp;
1011 1032 for (cp = outline; *cp != '\0'; cp++) {
1012 1033 *cp = towlower((wint_t)*cp);
1013 1034 }
1014 1035 }
1015 1036
1016 1037 if (xflag) {
1017 1038 for (pp = patterns; pp; pp = pp->next) {
1018 1039 if (outline[0] == pp->wpattern[0] &&
1019 1040 wcscmp(outline,
1020 1041 pp->wpattern) == 0) {
1021 1042 /* matched */
1022 1043 break;
1023 1044 }
1024 1045 }
1025 1046 } else {
1026 1047 for (pp = patterns; pp; pp = pp->next) {
1027 1048 if (wcswcs(outline, pp->wpattern)
1028 1049 != NULL) {
1029 1050 /* matched */
1030 1051 break;
1031 1052 }
1032 1053 }
1033 1054 }
1034 1055 } else if (Fflag) {
1035 1056 /* fgrep in byte-oriented handling */
1036 1057 char *fptr;
1037 1058 if (iflag) {
1038 1059 fptr = istrdup(ptr);
1039 1060 } else {
1040 1061 fptr = ptr;
1041 1062 }
1042 1063 if (xflag) {
1043 1064 /* fgrep -x */
1044 1065 for (pp = patterns; pp; pp = pp->next) {
1045 1066 if (fptr[0] == pp->pattern[0] &&
1046 1067 strcmp(fptr, pp->pattern) == 0) {
1047 1068 /* matched */
1048 1069 break;
1049 1070 }
1050 1071 }
1051 1072 } else {
1052 1073 for (pp = patterns; pp; pp = pp->next) {
1053 1074 if (strstr(fptr, pp->pattern) != NULL) {
1054 1075 /* matched */
1055 1076 break;
1056 1077 }
1057 1078 }
1058 1079 }
1059 1080 } else {
1060 1081 /* grep or egrep */
1061 1082 for (pp = patterns; pp; pp = pp->next) {
1062 1083 int rv;
1063 1084
1064 1085 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1065 1086 if (rv == REG_OK) {
1066 1087 /* matched */
1067 1088 break;
1068 1089 }
1069 1090
1070 1091 switch (rv) {
1071 1092 case REG_NOMATCH:
1072 1093 break;
1073 1094 case REG_ECHAR:
1074 1095 (void) fprintf(stderr, gettext(
1075 1096 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1076 1097 cmdname, fn, lineno);
1077 1098 break;
1078 1099 default:
1079 1100 (void) regerror(rv, &pp->re, errstr,
1080 1101 sizeof (errstr));
1081 1102 (void) fprintf(stderr, gettext(
1082 1103 "%s: input file \"%s\": line %lld: %s\n"),
1083 1104 cmdname, fn, lineno, errstr);
1084 1105 exit(2);
1085 1106 }
1086 1107 }
1087 1108 }
1088 1109
1089 1110 L_next_line:
1090 1111 /*
1091 1112 * Here, if pp points to non-NULL, something has been matched
1092 1113 * to the pattern.
1093 1114 */
1094 1115 if (nvflag == (pp != NULL)) {
1095 1116 matches++;
1096 1117 /*
1097 1118 * Handle q, l, and c flags.
1098 1119 */
1099 1120 if (qflag) {
1100 1121 /* no need to continue */
1101 1122 /*
1102 1123 * End of this line is ptrend.
1103 1124 * We have read up to ptr + data_len.
1104 1125 */
↓ open down ↓ |
743 lines elided |
↑ open up ↑ |
1105 1126 off_t pos;
1106 1127 pos = ptr + data_len - (ptrend + 1);
1107 1128 (void) lseek(fd, -pos, SEEK_CUR);
1108 1129 exit(0);
1109 1130 }
1110 1131 if (lflag) {
1111 1132 (void) printf("%s\n", fn);
1112 1133 break;
1113 1134 }
1114 1135 if (!cflag) {
1115 - if (outfn) {
1136 + if (Hflag || outfn) {
1116 1137 (void) printf("%s:", fn);
1117 1138 }
1118 1139 if (bflag) {
1119 1140 (void) printf("%lld:", (offset_t)
1120 1141 (line_offset / BSIZE));
1121 1142 }
1122 1143 if (nflag) {
1123 1144 (void) printf("%lld:", lineno);
1124 1145 }
1125 1146 *ptrend = '\n';
1126 1147 (void) fwrite(ptr, 1, line_len + 1, stdout);
1127 1148 }
1128 1149 if (ferror(stdout)) {
1129 1150 return (0);
1130 1151 }
1131 1152 }
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1132 1153 L_skip_line:
1133 1154 if (!newlinep)
1134 1155 break;
1135 1156
1136 1157 data_len -= line_len + 1;
1137 1158 line_offset += line_len + 1;
1138 1159 ptr = ptrend + 1;
1139 1160 }
1140 1161
1141 1162 if (cflag) {
1142 - if (outfn) {
1163 + if (Hflag || outfn) {
1143 1164 (void) printf("%s:", fn);
1144 1165 }
1145 1166 if (!qflag) {
1146 1167 (void) printf("%lld\n", matches);
1147 1168 }
1148 1169 }
1149 1170 return (matches != 0);
1150 1171 }
1151 1172
1152 1173 /*
1153 1174 * usage message for grep
1154 1175 */
1155 1176 static void
1156 1177 usage(void)
1157 1178 {
1158 1179 if (egrep || fgrep) {
1159 1180 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1160 1181 (void) fprintf(stderr,
1161 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1182 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1162 1183 "pattern_list [file ...]\n"));
1163 1184
1164 1185 (void) fprintf(stderr, "\t%s", cmdname);
1165 1186 (void) fprintf(stderr,
1166 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1187 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1167 1188 "[-e pattern_list]... "
1168 1189 "[-f pattern_file]... [file...]\n"));
1169 1190 } else {
1170 1191 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1171 1192 (void) fprintf(stderr,
1172 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1193 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1173 1194 "pattern_list [file ...]\n"));
1174 1195
1175 1196 (void) fprintf(stderr, "\t%s", cmdname);
1176 1197 (void) fprintf(stderr,
1177 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1198 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1178 1199 "[-e pattern_list]... "
1179 1200 "[-f pattern_file]... [file...]\n"));
1180 1201
1181 1202 (void) fprintf(stderr, "\t%s", cmdname);
1182 1203 (void) fprintf(stderr,
1183 - gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1204 + gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1184 1205 "pattern_list [file ...]\n"));
1185 1206
1186 1207 (void) fprintf(stderr, "\t%s", cmdname);
1187 1208 (void) fprintf(stderr,
1188 - gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1209 + gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1189 1210 "[-e pattern_list]... "
1190 1211 "[-f pattern_file]... [file...]\n"));
1191 1212
1192 1213 (void) fprintf(stderr, "\t%s", cmdname);
1193 1214 (void) fprintf(stderr,
1194 - gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1215 + gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1195 1216 "pattern_list [file ...]\n"));
1196 1217
1197 1218 (void) fprintf(stderr, "\t%s", cmdname);
1198 1219 (void) fprintf(stderr,
1199 - gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1220 + gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1200 1221 "[-f pattern_file]... [file...]\n"));
1201 1222 }
1202 1223 exit(2);
1203 1224 /* NOTREACHED */
1204 1225 }
1205 1226
1206 1227 /*
1207 1228 * Compile literal pattern into BMG tables
1208 1229 */
1209 1230 static void
1210 1231 bmgcomp(char *pat, int len)
1211 1232 {
1212 1233 int i;
1213 1234 int tlen;
1214 1235 unsigned char *uc = (unsigned char *)pat;
1215 1236
1216 1237 bmglen = len;
1217 1238 bmgpat = pat;
1218 1239
1219 1240 for (i = 0; i < M_CSETSIZE; i++) {
1220 1241 bmgtab[i] = len;
1221 1242 }
1222 1243
1223 1244 len--;
1224 1245 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1225 1246 bmgtab[*uc++] = tlen;
1226 1247 }
1227 1248 }
1228 1249
1229 1250 /*
1230 1251 * BMG search.
1231 1252 */
1232 1253 static char *
1233 1254 bmgexec(char *str, char *end)
1234 1255 {
1235 1256 int t;
1236 1257 char *k, *s, *p;
1237 1258
1238 1259 k = str + bmglen - 1;
1239 1260 if (bmglen == 1) {
1240 1261 return (memchr(str, bmgpat[0], end - str));
1241 1262 }
1242 1263 for (; ; ) {
1243 1264 /* inner loop, should be most optimized */
1244 1265 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1245 1266 k += t;
1246 1267 }
1247 1268 if (k >= end) {
1248 1269 return (NULL);
1249 1270 }
1250 1271 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1251 1272 if (p == bmgpat) {
1252 1273 return (s);
1253 1274 }
1254 1275 }
1255 1276 k++;
1256 1277 }
1257 1278 /* NOTREACHED */
1258 1279 }
↓ open down ↓ |
49 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX