Print this page
3737 grep does not support -H option
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/grep_xpg4/grep.c
+++ new/usr/src/cmd/grep_xpg4/grep.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License, Version 1.0 only
6 6 * (the "License"). You may not use this file except in compliance
7 7 * with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
21 21 */
22 22 /*
23 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 29 * Based on MKS grep command, with XCU & Solaris mods.
30 30 */
31 31
32 32 /*
33 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 34 *
35 35 */
36 36
37 37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38 38
39 39 #include <string.h>
40 40 #include <stdlib.h>
41 41 #include <ctype.h>
42 42 #include <stdarg.h>
43 43 #include <regex.h>
44 44 #include <limits.h>
45 45 #include <sys/types.h>
46 46 #include <sys/stat.h>
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
47 47 #include <fcntl.h>
48 48 #include <stdio.h>
49 49 #include <locale.h>
50 50 #include <wchar.h>
51 51 #include <errno.h>
52 52 #include <unistd.h>
53 53 #include <wctype.h>
54 54 #include <ftw.h>
55 55 #include <sys/param.h>
56 56
57 +#define STDIN_FILENAME gettext("(standard input)")
58 +
57 59 #define BSIZE 512 /* Size of block for -b */
58 60 #define BUFSIZE 8192 /* Input buffer size */
59 61 #define MAX_DEPTH 1000 /* how deep to recurse */
60 62
61 63 #define M_CSETSIZE 256 /* singlebyte chars */
62 64 static int bmglen; /* length of BMG pattern */
63 65 static char *bmgpat; /* BMG pattern */
64 66 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
65 67
66 68 typedef struct _PATTERN {
67 69 char *pattern; /* original pattern */
68 70 wchar_t *wpattern; /* wide, lowercased pattern */
69 71 struct _PATTERN *next;
70 72 regex_t re; /* compiled pattern */
71 73 } PATTERN;
72 74
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
73 75 static PATTERN *patterns;
74 76 static char errstr[128]; /* regerror string buffer */
75 77 static int regflags = 0; /* regcomp options */
76 78 static int matched = 0; /* return of the grep() */
77 79 static int errors = 0; /* count of errors */
78 80 static uchar_t fgrep = 0; /* Invoked as fgrep */
79 81 static uchar_t egrep = 0; /* Invoked as egrep */
80 82 static uchar_t nvflag = 1; /* Print matching lines */
81 83 static uchar_t cflag; /* Count of matches */
82 84 static uchar_t iflag; /* Case insensitve matching */
85 +static uchar_t Hflag; /* Precede lines by file name */
83 86 static uchar_t hflag; /* Supress printing of filename */
84 87 static uchar_t lflag; /* Print file names of matches */
85 88 static uchar_t nflag; /* Precede lines by line number */
86 89 static uchar_t rflag; /* Search directories recursively */
87 90 static uchar_t bflag; /* Preccede matches by block number */
88 91 static uchar_t sflag; /* Suppress file error messages */
89 92 static uchar_t qflag; /* Suppress standard output */
90 93 static uchar_t wflag; /* Search for expression as a word */
91 94 static uchar_t xflag; /* Anchoring */
92 95 static uchar_t Eflag; /* Egrep or -E flag */
93 96 static uchar_t Fflag; /* Fgrep or -F flag */
94 97 static uchar_t Rflag; /* Like rflag, but follow symlinks */
95 98 static uchar_t outfn; /* Put out file name */
96 99 static char *cmdname;
97 100
98 101 static int use_wchar, use_bmg, mblocale;
99 102
100 103 static size_t outbuflen, prntbuflen;
101 104 static char *prntbuf;
102 105 static wchar_t *outline;
103 106
104 107 static void addfile(const char *fn);
105 108 static void addpattern(char *s);
106 109 static void fixpatterns(void);
107 110 static void usage(void);
108 111 static int grep(int, const char *);
109 112 static void bmgcomp(char *, int);
110 113 static char *bmgexec(char *, char *);
111 114 static int recursive(const char *, const struct stat *, int, struct FTW *);
112 115 static void process_path(const char *);
113 116 static void process_file(const char *, int);
114 117
115 118 /*
116 119 * mainline for grep
117 120 */
118 121 int
119 122 main(int argc, char **argv)
120 123 {
121 124 char *ap;
122 125 int c;
123 126 int fflag = 0;
124 127 int i, n_pattern = 0, n_file = 0;
125 128 char **pattern_list = NULL;
126 129 char **file_list = NULL;
127 130
128 131 (void) setlocale(LC_ALL, "");
129 132 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
130 133 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
131 134 #endif
132 135 (void) textdomain(TEXT_DOMAIN);
133 136
134 137 /*
135 138 * true if this is running on the multibyte locale
136 139 */
137 140 mblocale = (MB_CUR_MAX > 1);
138 141 /*
139 142 * Skip leading slashes
140 143 */
141 144 cmdname = argv[0];
142 145 if (ap = strrchr(cmdname, '/'))
143 146 cmdname = ap + 1;
144 147
145 148 ap = cmdname;
146 149 /*
147 150 * Detect egrep/fgrep via command name, map to -E and -F options.
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
148 151 */
149 152 if (*ap == 'e' || *ap == 'E') {
150 153 regflags |= REG_EXTENDED;
151 154 egrep++;
152 155 } else {
153 156 if (*ap == 'f' || *ap == 'F') {
154 157 fgrep++;
155 158 }
156 159 }
157 160
158 - while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
161 + while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
159 162 switch (c) {
160 163 case 'v': /* POSIX: negate matches */
161 164 nvflag = 0;
162 165 break;
163 166
164 167 case 'c': /* POSIX: write count */
165 168 cflag++;
166 169 break;
167 170
168 171 case 'i': /* POSIX: ignore case */
169 172 iflag++;
170 173 regflags |= REG_ICASE;
171 174 break;
172 175
173 176 case 'l': /* POSIX: Write filenames only */
174 177 lflag++;
175 178 break;
176 179
177 180 case 'n': /* POSIX: Write line numbers */
178 181 nflag++;
179 182 break;
180 183
181 184 case 'r': /* Solaris: search recursively */
182 185 rflag++;
183 186 break;
184 187
185 188 case 'b': /* Solaris: Write file block numbers */
186 189 bflag++;
187 190 break;
188 191
189 192 case 's': /* POSIX: No error msgs for files */
190 193 sflag++;
191 194 break;
192 195
193 196 case 'e': /* POSIX: pattern list */
194 197 n_pattern++;
195 198 pattern_list = realloc(pattern_list,
196 199 sizeof (char *) * n_pattern);
197 200 if (pattern_list == NULL) {
198 201 (void) fprintf(stderr,
199 202 gettext("%s: out of memory\n"),
200 203 cmdname);
201 204 exit(2);
202 205 }
203 206 *(pattern_list + n_pattern - 1) = optarg;
204 207 break;
205 208
206 209 case 'f': /* POSIX: pattern file */
207 210 fflag = 1;
208 211 n_file++;
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
209 212 file_list = realloc(file_list,
210 213 sizeof (char *) * n_file);
211 214 if (file_list == NULL) {
212 215 (void) fprintf(stderr,
213 216 gettext("%s: out of memory\n"),
214 217 cmdname);
215 218 exit(2);
216 219 }
217 220 *(file_list + n_file - 1) = optarg;
218 221 break;
222 +
223 + /* based on options order h or H is set as in GNU grep */
219 224 case 'h': /* Solaris: supress printing of file name */
220 225 hflag = 1;
226 + Hflag = 0;
227 + break;
228 + /* Solaris: precede every matching with file name */
229 + case 'H':
230 + Hflag = 1;
231 + hflag = 0;
221 232 break;
222 233
223 234 case 'q': /* POSIX: quiet: status only */
224 235 qflag++;
225 236 break;
226 237
227 238 case 'w': /* Solaris: treat pattern as word */
228 239 wflag++;
229 240 break;
230 241
231 242 case 'x': /* POSIX: full line matches */
232 243 xflag++;
233 244 regflags |= REG_ANCHOR;
234 245 break;
235 246
236 247 case 'E': /* POSIX: Extended RE's */
237 248 regflags |= REG_EXTENDED;
238 249 Eflag++;
239 250 break;
240 251
241 252 case 'F': /* POSIX: strings, not RE's */
242 253 Fflag++;
243 254 break;
244 255
245 256 case 'R': /* Solaris: like rflag, but follow symlinks */
246 257 Rflag++;
247 258 rflag++;
248 259 break;
249 260
250 261 default:
251 262 usage();
252 263 }
253 264 }
254 265 /*
255 266 * If we're invoked as egrep or fgrep we need to do some checks
256 267 */
257 268
258 269 if (egrep || fgrep) {
259 270 /*
260 271 * Use of -E or -F with egrep or fgrep is illegal
261 272 */
262 273 if (Eflag || Fflag)
263 274 usage();
264 275 /*
265 276 * Don't allow use of wflag with egrep / fgrep
266 277 */
267 278 if (wflag)
268 279 usage();
269 280 /*
270 281 * For Solaris the -s flag is equivalent to XCU -q
271 282 */
272 283 if (sflag)
273 284 qflag++;
274 285 /*
275 286 * done with above checks - set the appropriate flags
276 287 */
277 288 if (egrep)
278 289 Eflag++;
279 290 else /* Else fgrep */
280 291 Fflag++;
281 292 }
282 293
283 294 if (wflag && (Eflag || Fflag)) {
284 295 /*
285 296 * -w cannot be specified with grep -F
286 297 */
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
287 298 usage();
288 299 }
289 300
290 301 /*
291 302 * -E and -F flags are mutually exclusive - check for this
292 303 */
293 304 if (Eflag && Fflag)
294 305 usage();
295 306
296 307 /*
308 + * -l overrides -H like in GNU grep
309 + */
310 + if (lflag)
311 + Hflag = 0;
312 +
313 + /*
297 314 * -c, -l and -q flags are mutually exclusive
298 315 * We have -c override -l like in Solaris.
299 316 * -q overrides -l & -c programmatically in grep() function.
300 317 */
301 318 if (cflag && lflag)
302 319 lflag = 0;
303 320
304 321 argv += optind - 1;
305 322 argc -= optind - 1;
306 323
307 324 /*
308 325 * Now handling -e and -f option
309 326 */
310 327 if (pattern_list) {
311 328 for (i = 0; i < n_pattern; i++) {
312 329 addpattern(pattern_list[i]);
313 330 }
314 331 free(pattern_list);
315 332 }
316 333 if (file_list) {
317 334 for (i = 0; i < n_file; i++) {
318 335 addfile(file_list[i]);
319 336 }
320 337 free(file_list);
321 338 }
322 339
323 340 /*
324 341 * No -e or -f? Make sure there is one more arg, use it as the pattern.
325 342 */
326 343 if (patterns == NULL && !fflag) {
327 344 if (argc < 2)
328 345 usage();
329 346 addpattern(argv[1]);
330 347 argc--;
331 348 argv++;
332 349 }
333 350
334 351 /*
335 352 * If -x flag is not specified or -i flag is specified
336 353 * with fgrep in a multibyte locale, need to use
337 354 * the wide character APIs. Otherwise, byte-oriented
338 355 * process will be done.
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
339 356 */
340 357 use_wchar = Fflag && mblocale && (!xflag || iflag);
341 358
342 359 /*
343 360 * Compile Patterns and also decide if BMG can be used
344 361 */
345 362 fixpatterns();
346 363
347 364 /* Process all files: stdin, or rest of arg list */
348 365 if (argc < 2) {
349 - matched = grep(0, gettext("(standard input)"));
366 + matched = grep(0, STDIN_FILENAME);
350 367 } else {
351 - if (argc > 2 && hflag == 0)
368 + if (Hflag || (argc > 2 && hflag == 0))
352 369 outfn = 1; /* Print filename on match line */
353 370 for (argv++; *argv != NULL; argv++) {
354 371 process_path(*argv);
355 372 }
356 373 }
357 374 /*
358 375 * Return() here is used instead of exit
359 376 */
360 377
361 378 (void) fflush(stdout);
362 379
363 380 if (errors)
364 381 return (2);
365 382 return (matched ? 0 : 1);
366 383 }
367 384
368 385 static void
369 386 process_path(const char *path)
370 387 {
371 388 struct stat st;
372 389 int walkflags = FTW_CHDIR;
373 390 char *buf = NULL;
374 391
375 392 if (rflag) {
376 393 if (stat(path, &st) != -1 &&
377 394 (st.st_mode & S_IFMT) == S_IFDIR) {
378 395 outfn = 1; /* Print filename */
379 396
380 397 /*
381 398 * Add trailing slash if arg
382 399 * is directory, to resolve symlinks.
383 400 */
384 401 if (path[strlen(path) - 1] != '/') {
385 402 (void) asprintf(&buf, "%s/", path);
386 403 if (buf != NULL)
387 404 path = buf;
388 405 }
389 406
390 407 /*
391 408 * Search through subdirs if path is directory.
392 409 * Don't follow symlinks if Rflag is not set.
393 410 */
394 411 if (!Rflag)
395 412 walkflags |= FTW_PHYS;
396 413
397 414 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
398 415 if (!sflag)
399 416 (void) fprintf(stderr,
400 417 gettext("%s: can't open \"%s\"\n"),
401 418 cmdname, path);
402 419 errors = 1;
403 420 }
404 421 return;
405 422 }
406 423 }
407 424 process_file(path, 0);
408 425 }
409 426
410 427 /*
411 428 * Read and process all files in directory recursively.
412 429 */
413 430 static int
414 431 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
415 432 {
416 433 /*
417 434 * Process files and follow symlinks if Rflag set.
418 435 */
419 436 if (info != FTW_F) {
420 437 /* Report broken symlinks and unreadable files */
421 438 if (!sflag &&
422 439 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
423 440 (void) fprintf(stderr,
424 441 gettext("%s: can't open \"%s\"\n"), cmdname, name);
425 442 }
426 443 return (0);
427 444 }
428 445
429 446
430 447 /* Skip devices and pipes if Rflag is not set */
431 448 if (!Rflag && !S_ISREG(statp->st_mode))
432 449 return (0);
433 450 /* Pass offset to relative name from FTW_CHDIR */
434 451 process_file(name, ftw->base);
435 452 return (0);
436 453 }
437 454
438 455 /*
439 456 * Opens file and call grep function.
440 457 */
441 458 static void
442 459 process_file(const char *name, int base)
443 460 {
444 461 int fd;
445 462
446 463 if ((fd = open(name + base, O_RDONLY)) == -1) {
447 464 errors = 1;
448 465 if (!sflag) /* Silent mode */
449 466 (void) fprintf(stderr, gettext(
450 467 "%s: can't open \"%s\"\n"),
451 468 cmdname, name);
452 469 return;
453 470 }
454 471 matched |= grep(fd, name);
455 472 (void) close(fd);
456 473
457 474 if (ferror(stdout)) {
458 475 (void) fprintf(stderr, gettext(
459 476 "%s: error writing to stdout\n"),
460 477 cmdname);
461 478 (void) fflush(stdout);
462 479 exit(2);
463 480 }
464 481
465 482 }
466 483
467 484 /*
468 485 * Add a file of strings to the pattern list.
469 486 */
470 487 static void
471 488 addfile(const char *fn)
472 489 {
473 490 FILE *fp;
474 491 char *inbuf;
475 492 char *bufp;
476 493 size_t bufsiz, buflen, bufused;
477 494
478 495 /*
479 496 * Open the pattern file
480 497 */
481 498 if ((fp = fopen(fn, "r")) == NULL) {
482 499 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
483 500 cmdname, fn);
484 501 exit(2);
485 502 }
486 503 bufsiz = BUFSIZE;
487 504 if ((inbuf = malloc(bufsiz)) == NULL) {
488 505 (void) fprintf(stderr,
489 506 gettext("%s: out of memory\n"), cmdname);
490 507 exit(2);
491 508 }
492 509 bufp = inbuf;
493 510 bufused = 0;
494 511 /*
495 512 * Read in the file, reallocing as we need more memory
496 513 */
497 514 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
498 515 buflen = strlen(bufp);
499 516 bufused += buflen;
500 517 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
501 518 /*
502 519 * if this line does not fit to the buffer,
503 520 * realloc larger buffer
504 521 */
505 522 bufsiz += BUFSIZE;
506 523 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
507 524 (void) fprintf(stderr,
508 525 gettext("%s: out of memory\n"),
509 526 cmdname);
510 527 exit(2);
511 528 }
512 529 bufp = inbuf + bufused;
513 530 continue;
514 531 }
515 532 if (bufp[buflen - 1] == '\n') {
516 533 bufp[--buflen] = '\0';
517 534 }
518 535 addpattern(inbuf);
519 536
520 537 bufp = inbuf;
521 538 bufused = 0;
522 539 }
523 540 free(inbuf);
524 541 (void) fclose(fp);
525 542 }
526 543
527 544 /*
528 545 * Add a string to the pattern list.
529 546 */
530 547 static void
531 548 addpattern(char *s)
532 549 {
533 550 PATTERN *pp;
534 551 char *wordbuf;
535 552 char *np;
536 553
537 554 for (; ; ) {
538 555 np = strchr(s, '\n');
539 556 if (np != NULL)
540 557 *np = '\0';
541 558 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
542 559 (void) fprintf(stderr, gettext(
543 560 "%s: out of memory\n"),
544 561 cmdname);
545 562 exit(2);
546 563 }
547 564 if (wflag) {
548 565 /*
549 566 * Solaris wflag support: Add '<' '>' to pattern to
550 567 * select it as a word. Doesn't make sense with -F
551 568 * but we're Libertarian.
552 569 */
553 570 size_t slen, wordlen;
554 571
555 572 slen = strlen(s);
556 573 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
557 574 if ((wordbuf = malloc(wordlen)) == NULL) {
558 575 (void) fprintf(stderr,
559 576 gettext("%s: out of memory\n"),
560 577 cmdname);
561 578 exit(2);
562 579 }
563 580 (void) strcpy(wordbuf, "\\<");
564 581 (void) strcpy(wordbuf + 2, s);
565 582 (void) strcpy(wordbuf + 2 + slen, "\\>");
566 583 } else {
567 584 if ((wordbuf = strdup(s)) == NULL) {
568 585 (void) fprintf(stderr,
569 586 gettext("%s: out of memory\n"),
570 587 cmdname);
571 588 exit(2);
572 589 }
573 590 }
574 591 pp->pattern = wordbuf;
575 592 pp->next = patterns;
576 593 patterns = pp;
577 594 if (np == NULL)
578 595 break;
579 596 s = np + 1;
580 597 }
581 598 }
582 599
583 600 /*
584 601 * Fix patterns.
585 602 * Must do after all arguments read, in case later -i option.
586 603 */
587 604 static void
588 605 fixpatterns(void)
589 606 {
590 607 PATTERN *pp;
591 608 int rv, fix_pattern, npatterns;
592 609
593 610 /*
594 611 * As REG_ANCHOR flag is not supported in the current Solaris,
595 612 * need to fix the specified pattern if -x is specified with
596 613 * grep or egrep
597 614 */
598 615 fix_pattern = !Fflag && xflag;
599 616
600 617 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
601 618 npatterns++;
602 619 if (fix_pattern) {
603 620 char *cp, *cq;
604 621 size_t plen, nplen;
605 622
606 623 plen = strlen(pp->pattern);
607 624 /* '^' pattern '$' */
608 625 nplen = 1 + plen + 1 + 1;
609 626 if ((cp = malloc(nplen)) == NULL) {
610 627 (void) fprintf(stderr,
611 628 gettext("%s: out of memory\n"),
612 629 cmdname);
613 630 exit(2);
614 631 }
615 632 cq = cp;
616 633 *cq++ = '^';
617 634 cq = strcpy(cq, pp->pattern) + plen;
618 635 *cq++ = '$';
619 636 *cq = '\0';
620 637 free(pp->pattern);
621 638 pp->pattern = cp;
622 639 }
623 640
624 641 if (Fflag) {
625 642 if (use_wchar) {
626 643 /*
627 644 * Fflag && mblocale && iflag
628 645 * Fflag && mblocale && !xflag
629 646 */
630 647 size_t n;
631 648 n = strlen(pp->pattern) + 1;
632 649 if ((pp->wpattern =
633 650 malloc(sizeof (wchar_t) * n)) == NULL) {
634 651 (void) fprintf(stderr,
635 652 gettext("%s: out of memory\n"),
636 653 cmdname);
637 654 exit(2);
638 655 }
639 656 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
640 657 (size_t)-1) {
641 658 (void) fprintf(stderr,
642 659 gettext("%s: failed to convert "
643 660 "\"%s\" to wide-characters\n"),
644 661 cmdname, pp->pattern);
645 662 exit(2);
646 663 }
647 664 if (iflag) {
648 665 wchar_t *wp;
649 666 for (wp = pp->wpattern; *wp != L'\0';
650 667 wp++) {
651 668 *wp = towlower((wint_t)*wp);
652 669 }
653 670 }
654 671 free(pp->pattern);
655 672 } else {
656 673 /*
657 674 * Fflag && mblocale && !iflag
658 675 * Fflag && !mblocale && iflag
659 676 * Fflag && !mblocale && !iflag
660 677 */
661 678 if (iflag) {
662 679 unsigned char *cp;
663 680 for (cp = (unsigned char *)pp->pattern;
664 681 *cp != '\0'; cp++) {
665 682 *cp = tolower(*cp);
666 683 }
667 684 }
668 685 }
669 686 /*
670 687 * fgrep: No regular expressions.
671 688 */
672 689 continue;
673 690 }
674 691
675 692 /*
676 693 * For non-fgrep, compile the regular expression,
677 694 * give an informative error message, and exit if
678 695 * it didn't compile.
679 696 */
680 697 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
681 698 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
682 699 (void) fprintf(stderr,
683 700 gettext("%s: RE error in %s: %s\n"),
684 701 cmdname, pp->pattern, errstr);
685 702 exit(2);
686 703 }
687 704 free(pp->pattern);
688 705 }
689 706
690 707 /*
691 708 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
692 709 * Use the Boyer-Moore-Gosper algorithm if:
693 710 * - fgrep (Fflag)
694 711 * - singlebyte locale (!mblocale)
695 712 * - no ignoring case (!iflag)
696 713 * - no printing line numbers (!nflag)
697 714 * - no negating the output (nvflag)
698 715 * - only one pattern (npatterns == 1)
699 716 * - non zero length pattern (strlen(patterns->pattern) != 0)
700 717 *
701 718 * It's guaranteed patterns->pattern is still alive
702 719 * when Fflag && !mblocale.
703 720 */
704 721 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
705 722 (npatterns == 1) && (strlen(patterns->pattern) != 0);
706 723 }
707 724
708 725 /*
709 726 * Search a newline from the beginning of the string
710 727 */
711 728 static char *
712 729 find_nl(const char *ptr, size_t len)
713 730 {
714 731 while (len-- != 0) {
715 732 if (*ptr++ == '\n') {
716 733 return ((char *)--ptr);
717 734 }
718 735 }
719 736 return (NULL);
720 737 }
721 738
722 739 /*
723 740 * Search a newline from the end of the string
724 741 */
725 742 static char *
726 743 rfind_nl(const char *ptr, size_t len)
727 744 {
728 745 const char *uptr = ptr + len;
729 746 while (len--) {
730 747 if (*--uptr == '\n') {
731 748 return ((char *)uptr);
732 749 }
733 750 }
734 751 return (NULL);
735 752 }
736 753
737 754 /*
738 755 * Duplicate the specified string converting each character
739 756 * into a lower case.
740 757 */
741 758 static char *
742 759 istrdup(const char *s1)
743 760 {
744 761 static size_t ibuflen = 0;
745 762 static char *ibuf = NULL;
746 763 size_t slen;
747 764 char *p;
748 765
749 766 slen = strlen(s1);
750 767 if (slen >= ibuflen) {
751 768 /* ibuf does not fit to s1 */
752 769 ibuflen = slen + 1;
753 770 ibuf = realloc(ibuf, ibuflen);
754 771 if (ibuf == NULL) {
755 772 (void) fprintf(stderr,
756 773 gettext("%s: out of memory\n"), cmdname);
757 774 exit(2);
758 775 }
759 776 }
760 777 p = ibuf;
761 778 do {
762 779 *p++ = tolower(*s1);
763 780 } while (*s1++ != '\0');
764 781 return (ibuf);
765 782 }
766 783
767 784 /*
768 785 * Do grep on a single file.
769 786 * Return true in any lines matched.
770 787 *
771 788 * We have two strategies:
772 789 * The fast one is used when we have a single pattern with
773 790 * a string known to occur in the pattern. We can then
774 791 * do a BMG match on the whole buffer.
775 792 * This is an order of magnitude faster.
776 793 * Otherwise we split the buffer into lines,
777 794 * and check for a match on each line.
778 795 */
779 796 static int
780 797 grep(int fd, const char *fn)
781 798 {
782 799 PATTERN *pp;
783 800 off_t data_len; /* length of the data chunk */
784 801 off_t line_len; /* length of the current line */
785 802 off_t line_offset; /* current line's offset from the beginning */
786 803 long long lineno;
787 804 long long matches = 0; /* Number of matching lines */
788 805 int newlinep; /* 0 if the last line of file has no newline */
789 806 char *ptr, *ptrend;
790 807
791 808
792 809 if (patterns == NULL)
793 810 return (0); /* no patterns to match -- just return */
794 811
795 812 pp = patterns;
796 813
797 814 if (use_bmg) {
798 815 bmgcomp(pp->pattern, strlen(pp->pattern));
799 816 }
800 817
801 818 if (use_wchar && outline == NULL) {
802 819 outbuflen = BUFSIZE + 1;
803 820 outline = malloc(sizeof (wchar_t) * outbuflen);
804 821 if (outline == NULL) {
805 822 (void) fprintf(stderr, gettext("%s: out of memory\n"),
806 823 cmdname);
807 824 exit(2);
808 825 }
809 826 }
810 827
811 828 if (prntbuf == NULL) {
812 829 prntbuflen = BUFSIZE;
813 830 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
814 831 (void) fprintf(stderr, gettext("%s: out of memory\n"),
815 832 cmdname);
816 833 exit(2);
817 834 }
818 835 }
819 836
820 837 line_offset = 0;
821 838 lineno = 0;
822 839 newlinep = 1;
823 840 data_len = 0;
824 841 for (; ; ) {
825 842 long count;
826 843 off_t offset = 0;
827 844
828 845 if (data_len == 0) {
829 846 /*
830 847 * If no data in the buffer, reset ptr
831 848 */
832 849 ptr = prntbuf;
833 850 }
834 851 if (ptr == prntbuf) {
835 852 /*
836 853 * The current data chunk starts from prntbuf.
837 854 * This means either the buffer has no data
838 855 * or the buffer has no newline.
839 856 * So, read more data from input.
840 857 */
841 858 count = read(fd, ptr + data_len, prntbuflen - data_len);
842 859 if (count < 0) {
843 860 /* read error */
844 861 if (cflag) {
845 862 if (outfn && !rflag) {
846 863 (void) fprintf(stdout,
847 864 "%s:", fn);
848 865 }
849 866 if (!qflag && !rflag) {
850 867 (void) fprintf(stdout, "%lld\n",
851 868 matches);
852 869 }
853 870 }
854 871 return (0);
855 872 } else if (count == 0) {
856 873 /* no new data */
857 874 if (data_len == 0) {
858 875 /* end of file already reached */
859 876 break;
860 877 }
861 878 /* last line of file has no newline */
862 879 ptrend = ptr + data_len;
863 880 newlinep = 0;
864 881 goto L_start_process;
865 882 }
866 883 offset = data_len;
867 884 data_len += count;
868 885 }
869 886
870 887 /*
871 888 * Look for newline in the chunk
872 889 * between ptr + offset and ptr + data_len - offset.
873 890 */
874 891 ptrend = find_nl(ptr + offset, data_len - offset);
875 892 if (ptrend == NULL) {
876 893 /* no newline found in this chunk */
877 894 if (ptr > prntbuf) {
878 895 /*
879 896 * Move remaining data to the beginning
880 897 * of the buffer.
881 898 * Remaining data lie from ptr for
882 899 * data_len bytes.
883 900 */
884 901 (void) memmove(prntbuf, ptr, data_len);
885 902 }
886 903 if (data_len == prntbuflen) {
887 904 /*
888 905 * No enough room in the buffer
889 906 */
890 907 prntbuflen += BUFSIZE;
891 908 prntbuf = realloc(prntbuf, prntbuflen + 1);
892 909 if (prntbuf == NULL) {
893 910 (void) fprintf(stderr,
894 911 gettext("%s: out of memory\n"),
895 912 cmdname);
896 913 exit(2);
897 914 }
898 915 }
899 916 ptr = prntbuf;
900 917 /* read the next input */
901 918 continue;
902 919 }
903 920 L_start_process:
904 921
905 922 /*
906 923 * Beginning of the chunk: ptr
907 924 * End of the chunk: ptr + data_len
908 925 * Beginning of the line: ptr
909 926 * End of the line: ptrend
910 927 */
911 928
912 929 if (use_bmg) {
913 930 /*
914 931 * Use Boyer-Moore-Gosper algorithm to find out if
915 932 * this chunk (not this line) contains the specified
916 933 * pattern. If not, restart from the last line
917 934 * of this chunk.
918 935 */
919 936 char *bline;
920 937 bline = bmgexec(ptr, ptr + data_len);
921 938 if (bline == NULL) {
922 939 /*
923 940 * No pattern found in this chunk.
924 941 * Need to find the last line
925 942 * in this chunk.
926 943 */
927 944 ptrend = rfind_nl(ptr, data_len);
928 945
929 946 /*
930 947 * When this chunk does not contain newline,
931 948 * ptrend becomes NULL, which should happen
932 949 * when the last line of file does not end
933 950 * with a newline. At such a point,
934 951 * newlinep should have been set to 0.
935 952 * Therefore, just after jumping to
936 953 * L_skip_line, the main for-loop quits,
937 954 * and the line_len value won't be
938 955 * used.
939 956 */
940 957 line_len = ptrend - ptr;
941 958 goto L_skip_line;
942 959 }
943 960 if (bline > ptrend) {
944 961 /*
945 962 * Pattern found not in the first line
946 963 * of this chunk.
947 964 * Discard the first line.
948 965 */
949 966 line_len = ptrend - ptr;
950 967 goto L_skip_line;
951 968 }
952 969 /*
953 970 * Pattern found in the first line of this chunk.
954 971 * Using this result.
955 972 */
956 973 *ptrend = '\0';
957 974 line_len = ptrend - ptr;
958 975
959 976 /*
960 977 * before jumping to L_next_line,
961 978 * need to handle xflag if specified
962 979 */
963 980 if (xflag && (line_len != bmglen ||
964 981 strcmp(bmgpat, ptr) != 0)) {
965 982 /* didn't match */
966 983 pp = NULL;
967 984 } else {
968 985 pp = patterns; /* to make it happen */
969 986 }
970 987 goto L_next_line;
971 988 }
972 989 lineno++;
973 990 /*
974 991 * Line starts from ptr and ends at ptrend.
975 992 * line_len will be the length of the line.
976 993 */
977 994 *ptrend = '\0';
978 995 line_len = ptrend - ptr;
979 996
980 997 /*
981 998 * From now, the process will be performed based
982 999 * on the line from ptr to ptrend.
983 1000 */
984 1001 if (use_wchar) {
985 1002 size_t len;
986 1003
987 1004 if (line_len >= outbuflen) {
988 1005 outbuflen = line_len + 1;
989 1006 outline = realloc(outline,
990 1007 sizeof (wchar_t) * outbuflen);
991 1008 if (outline == NULL) {
992 1009 (void) fprintf(stderr,
993 1010 gettext("%s: out of memory\n"),
994 1011 cmdname);
995 1012 exit(2);
996 1013 }
997 1014 }
998 1015
999 1016 len = mbstowcs(outline, ptr, line_len);
1000 1017 if (len == (size_t)-1) {
1001 1018 (void) fprintf(stderr, gettext(
1002 1019 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1003 1020 cmdname, fn, lineno);
1004 1021 /* never match a line with invalid sequence */
1005 1022 goto L_skip_line;
1006 1023 }
1007 1024 outline[len] = L'\0';
1008 1025
1009 1026 if (iflag) {
1010 1027 wchar_t *cp;
1011 1028 for (cp = outline; *cp != '\0'; cp++) {
1012 1029 *cp = towlower((wint_t)*cp);
1013 1030 }
1014 1031 }
1015 1032
1016 1033 if (xflag) {
1017 1034 for (pp = patterns; pp; pp = pp->next) {
1018 1035 if (outline[0] == pp->wpattern[0] &&
1019 1036 wcscmp(outline,
1020 1037 pp->wpattern) == 0) {
1021 1038 /* matched */
1022 1039 break;
1023 1040 }
1024 1041 }
1025 1042 } else {
1026 1043 for (pp = patterns; pp; pp = pp->next) {
1027 1044 if (wcswcs(outline, pp->wpattern)
1028 1045 != NULL) {
1029 1046 /* matched */
1030 1047 break;
1031 1048 }
1032 1049 }
1033 1050 }
1034 1051 } else if (Fflag) {
1035 1052 /* fgrep in byte-oriented handling */
1036 1053 char *fptr;
1037 1054 if (iflag) {
1038 1055 fptr = istrdup(ptr);
1039 1056 } else {
1040 1057 fptr = ptr;
1041 1058 }
1042 1059 if (xflag) {
1043 1060 /* fgrep -x */
1044 1061 for (pp = patterns; pp; pp = pp->next) {
1045 1062 if (fptr[0] == pp->pattern[0] &&
1046 1063 strcmp(fptr, pp->pattern) == 0) {
1047 1064 /* matched */
1048 1065 break;
1049 1066 }
1050 1067 }
1051 1068 } else {
1052 1069 for (pp = patterns; pp; pp = pp->next) {
1053 1070 if (strstr(fptr, pp->pattern) != NULL) {
1054 1071 /* matched */
1055 1072 break;
1056 1073 }
1057 1074 }
1058 1075 }
1059 1076 } else {
1060 1077 /* grep or egrep */
1061 1078 for (pp = patterns; pp; pp = pp->next) {
1062 1079 int rv;
1063 1080
1064 1081 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1065 1082 if (rv == REG_OK) {
1066 1083 /* matched */
1067 1084 break;
1068 1085 }
1069 1086
1070 1087 switch (rv) {
1071 1088 case REG_NOMATCH:
1072 1089 break;
1073 1090 case REG_ECHAR:
1074 1091 (void) fprintf(stderr, gettext(
1075 1092 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1076 1093 cmdname, fn, lineno);
1077 1094 break;
1078 1095 default:
1079 1096 (void) regerror(rv, &pp->re, errstr,
1080 1097 sizeof (errstr));
1081 1098 (void) fprintf(stderr, gettext(
1082 1099 "%s: input file \"%s\": line %lld: %s\n"),
1083 1100 cmdname, fn, lineno, errstr);
1084 1101 exit(2);
1085 1102 }
1086 1103 }
1087 1104 }
1088 1105
1089 1106 L_next_line:
1090 1107 /*
1091 1108 * Here, if pp points to non-NULL, something has been matched
1092 1109 * to the pattern.
1093 1110 */
1094 1111 if (nvflag == (pp != NULL)) {
1095 1112 matches++;
1096 1113 /*
1097 1114 * Handle q, l, and c flags.
1098 1115 */
1099 1116 if (qflag) {
1100 1117 /* no need to continue */
1101 1118 /*
1102 1119 * End of this line is ptrend.
1103 1120 * We have read up to ptr + data_len.
1104 1121 */
↓ open down ↓ |
743 lines elided |
↑ open up ↑ |
1105 1122 off_t pos;
1106 1123 pos = ptr + data_len - (ptrend + 1);
1107 1124 (void) lseek(fd, -pos, SEEK_CUR);
1108 1125 exit(0);
1109 1126 }
1110 1127 if (lflag) {
1111 1128 (void) printf("%s\n", fn);
1112 1129 break;
1113 1130 }
1114 1131 if (!cflag) {
1115 - if (outfn) {
1132 + if (Hflag || outfn) {
1116 1133 (void) printf("%s:", fn);
1117 1134 }
1118 1135 if (bflag) {
1119 1136 (void) printf("%lld:", (offset_t)
1120 1137 (line_offset / BSIZE));
1121 1138 }
1122 1139 if (nflag) {
1123 1140 (void) printf("%lld:", lineno);
1124 1141 }
1125 1142 *ptrend = '\n';
1126 1143 (void) fwrite(ptr, 1, line_len + 1, stdout);
1127 1144 }
1128 1145 if (ferror(stdout)) {
1129 1146 return (0);
1130 1147 }
1131 1148 }
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1132 1149 L_skip_line:
1133 1150 if (!newlinep)
1134 1151 break;
1135 1152
1136 1153 data_len -= line_len + 1;
1137 1154 line_offset += line_len + 1;
1138 1155 ptr = ptrend + 1;
1139 1156 }
1140 1157
1141 1158 if (cflag) {
1142 - if (outfn) {
1159 + if (Hflag || outfn) {
1143 1160 (void) printf("%s:", fn);
1144 1161 }
1145 1162 if (!qflag) {
1146 1163 (void) printf("%lld\n", matches);
1147 1164 }
1148 1165 }
1149 1166 return (matches != 0);
1150 1167 }
1151 1168
1152 1169 /*
1153 1170 * usage message for grep
1154 1171 */
1155 1172 static void
1156 1173 usage(void)
1157 1174 {
1158 1175 if (egrep || fgrep) {
1159 1176 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1160 1177 (void) fprintf(stderr,
1161 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1178 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1162 1179 "pattern_list [file ...]\n"));
1163 1180
1164 1181 (void) fprintf(stderr, "\t%s", cmdname);
1165 1182 (void) fprintf(stderr,
1166 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1183 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1167 1184 "[-e pattern_list]... "
1168 1185 "[-f pattern_file]... [file...]\n"));
1169 1186 } else {
1170 1187 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1171 1188 (void) fprintf(stderr,
1172 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1189 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1173 1190 "pattern_list [file ...]\n"));
1174 1191
1175 1192 (void) fprintf(stderr, "\t%s", cmdname);
1176 1193 (void) fprintf(stderr,
1177 - gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1194 + gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1178 1195 "[-e pattern_list]... "
1179 1196 "[-f pattern_file]... [file...]\n"));
1180 1197
1181 1198 (void) fprintf(stderr, "\t%s", cmdname);
1182 1199 (void) fprintf(stderr,
1183 - gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1200 + gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1184 1201 "pattern_list [file ...]\n"));
1185 1202
1186 1203 (void) fprintf(stderr, "\t%s", cmdname);
1187 1204 (void) fprintf(stderr,
1188 - gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1205 + gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1189 1206 "[-e pattern_list]... "
1190 1207 "[-f pattern_file]... [file...]\n"));
1191 1208
1192 1209 (void) fprintf(stderr, "\t%s", cmdname);
1193 1210 (void) fprintf(stderr,
1194 - gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1211 + gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1195 1212 "pattern_list [file ...]\n"));
1196 1213
1197 1214 (void) fprintf(stderr, "\t%s", cmdname);
1198 1215 (void) fprintf(stderr,
1199 - gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1216 + gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1200 1217 "[-f pattern_file]... [file...]\n"));
1201 1218 }
1202 1219 exit(2);
1203 1220 /* NOTREACHED */
1204 1221 }
1205 1222
1206 1223 /*
1207 1224 * Compile literal pattern into BMG tables
1208 1225 */
1209 1226 static void
1210 1227 bmgcomp(char *pat, int len)
1211 1228 {
1212 1229 int i;
1213 1230 int tlen;
1214 1231 unsigned char *uc = (unsigned char *)pat;
1215 1232
1216 1233 bmglen = len;
1217 1234 bmgpat = pat;
1218 1235
1219 1236 for (i = 0; i < M_CSETSIZE; i++) {
1220 1237 bmgtab[i] = len;
1221 1238 }
1222 1239
1223 1240 len--;
1224 1241 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1225 1242 bmgtab[*uc++] = tlen;
1226 1243 }
1227 1244 }
1228 1245
1229 1246 /*
1230 1247 * BMG search.
1231 1248 */
1232 1249 static char *
1233 1250 bmgexec(char *str, char *end)
1234 1251 {
1235 1252 int t;
1236 1253 char *k, *s, *p;
1237 1254
1238 1255 k = str + bmglen - 1;
1239 1256 if (bmglen == 1) {
1240 1257 return (memchr(str, bmgpat[0], end - str));
1241 1258 }
1242 1259 for (; ; ) {
1243 1260 /* inner loop, should be most optimized */
1244 1261 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1245 1262 k += t;
1246 1263 }
1247 1264 if (k >= end) {
1248 1265 return (NULL);
1249 1266 }
1250 1267 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1251 1268 if (p == bmgpat) {
1252 1269 return (s);
1253 1270 }
1254 1271 }
1255 1272 k++;
1256 1273 }
1257 1274 /* NOTREACHED */
1258 1275 }
↓ open down ↓ |
49 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX