1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
32
33 #pragma ident "%Z%%M% %I% %E% SMI"
34
35 /*
36 * fgrep -- print all lines containing any of a set of keywords
37 *
38 * status returns:
39 * 0 - ok, and some matches
40 * 1 - ok, but no matches
41 * 2 - some error
42 */
43
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <sys/types.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <locale.h>
50 #include <libintl.h>
51 #include <euc.h>
52 #include <sys/stat.h>
53 #include <fcntl.h>
54
55 #include <getwidth.h>
56
57 eucwidth_t WW;
58 #define WIDTH1 WW._eucw1
59 #define WIDTH2 WW._eucw2
60 #define WIDTH3 WW._eucw3
61 #define MULTI_BYTE WW._multibyte
62 #define GETONE(lc, p) \
63 cw = ISASCII(lc = (unsigned char)*p++) ? 1 : \
64 (ISSET2(lc) ? WIDTH2 : \
65 (ISSET3(lc) ? WIDTH3 : WIDTH1)); \
66 if (--cw > --ccount) { \
67 cw -= ccount; \
68 while (ccount--) \
69 lc = (lc << 7) | ((*p++) & 0177); \
70 if (p >= &buf[fw_lBufsiz + BUFSIZ]) { \
71 if (nlp == buf) { \
72 /* Increase the buffer size */ \
73 fw_lBufsiz += BUFSIZ; \
74 if ((buf = realloc(buf, \
75 fw_lBufsiz + BUFSIZ)) == NULL) { \
76 exit(2); /* out of memory */ \
77 } \
78 nlp = buf; \
79 p = &buf[fw_lBufsiz]; \
80 } else { \
81 /* shift the buffer contents down */ \
82 (void) memmove(buf, nlp, \
83 &buf[fw_lBufsiz + BUFSIZ] - nlp);\
84 p -= nlp - buf; \
85 nlp = buf; \
86 } \
87 } \
88 if (p > &buf[fw_lBufsiz]) { \
89 if ((ccount = fread(p, sizeof (char), \
90 &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
91 <= 0) break; \
92 } else if ((ccount = fread(p, \
93 sizeof (char), BUFSIZ, fptr)) <= 0) \
94 break; \
95 blkno += (long long)ccount; \
96 } \
97 ccount -= cw; \
98 while (cw--) \
99 lc = (lc << 7) | ((*p++) & 0177)
100
101 /*
102 * The same() macro and letter() function were inserted to allow for
103 * the -i option work for the multi-byte environment.
104 */
105 wchar_t letter();
106 #define same(a, b) \
107 (a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
108 letter(a) == letter(b))
109
110 #define STDIN_FILENAME gettext("(standard input)")
111
112 #define QSIZE 400
113 struct words {
114 wchar_t inp;
115 char out;
116 struct words *nst;
117 struct words *link;
118 struct words *fail;
119 } *w = NULL, *smax, *q;
120
121 FILE *fptr;
122 long long lnum;
123 int bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, qflag;
124 int Hflag, hflag, iflag;
125 int retcode = 0;
126 int nfile;
127 long long blkno;
128 int nsucc;
129 long long tln;
130 FILE *wordf;
131 char *argptr;
132 off_t input_size = 0;
133
134 void execute(char *);
135 void cgotofn(void);
136 void overflo(void);
137 void cfail(void);
138
139 static long fw_lBufsiz = 0;
140
141 int
142 main(int argc, char **argv)
143 {
144 int c;
145 int errflg = 0;
146 struct stat file_stat;
147
148 (void) setlocale(LC_ALL, "");
149 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
150 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
151 #endif
152 (void) textdomain(TEXT_DOMAIN);
153
154 while ((c = getopt(argc, argv, "Hhybcie:f:lnvxqs")) != EOF)
155 switch (c) {
156
157 case 'q':
158 case 's': /* Solaris: legacy option */
159 qflag++;
160 continue;
161 case 'H':
162 Hflag++;
163 hflag = 0;
164 continue;
165 case 'h':
166 hflag++;
167 Hflag = 0;
168 continue;
169 case 'b':
170 bflag++;
171 continue;
172
173 case 'i':
174 case 'y':
175 iflag++;
176 continue;
177
178 case 'c':
179 cflag++;
180 continue;
181
182 case 'e':
183 eflag++;
184 argptr = optarg;
185 input_size = strlen(argptr);
186 continue;
187
188 case 'f':
189 fflag++;
190 wordf = fopen(optarg, "r");
191 if (wordf == NULL) {
192 (void) fprintf(stderr,
193 gettext("fgrep: can't open %s\n"),
194 optarg);
195 exit(2);
196 }
197
198 if (fstat(fileno(wordf), &file_stat) == 0) {
199 input_size = file_stat.st_size;
200 } else {
201 (void) fprintf(stderr,
202 gettext("fgrep: can't fstat %s\n"),
203 optarg);
204 exit(2);
205 }
206
207 continue;
208
209 case 'l':
210 lflag++;
211 continue;
212
213 case 'n':
214 nflag++;
215 continue;
216
217 case 'v':
218 vflag++;
219 continue;
220
221 case 'x':
222 xflag++;
223 continue;
224
225 case '?':
226 errflg++;
227 }
228
229 argc -= optind;
230 if (errflg || ((argc <= 0) && !fflag && !eflag)) {
231 (void) printf(gettext("usage: fgrep [ -bcHhilnqsvx ] "
232 "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
233 exit(2);
234 }
235 if (!eflag && !fflag) {
236 argptr = argv[optind];
237 input_size = strlen(argptr);
238 input_size++;
239 optind++;
240 argc--;
241 }
242
243 /*
244 * Normally we need one struct words for each letter in the pattern
245 * plus one terminating struct words with outp = 1, but when -x option
246 * is specified we require one more struct words for `\n` character so we
247 * calculate the input_size as below. We add extra 1 because
248 * (input_size/2) rounds off odd numbers
249 */
250
251 if (xflag) {
252 input_size = input_size + (input_size/2) + 1;
253 }
254
255 input_size++;
256
257 w = (struct words *)calloc(input_size, sizeof (struct words));
258 if (w == NULL) {
259 (void) fprintf(stderr,
260 gettext("fgrep: could not allocate "
261 "memory for wordlist\n"));
262 exit(2);
263 }
264
265 getwidth(&WW);
266 if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
267 (WIDTH3 == 0)) {
268 /*
269 * If non EUC-based locale,
270 * assume WIDTH1 is 1.
271 */
272 WIDTH1 = 1;
273 }
274 WIDTH2++;
275 WIDTH3++;
276
277 cgotofn();
278 cfail();
279 nfile = argc;
280 argv = &argv[optind];
281 if (argc <= 0) {
282 execute((char *)NULL);
283 } else
284 while (--argc >= 0) {
285 execute(*argv);
286 argv++;
287 }
288
289 if (w != NULL) {
290 free(w);
291 }
292
293 return (retcode != 0 ? retcode : nsucc == 0);
294 }
295
296 void
297 execute(char *file)
298 {
299 char *p;
300 struct words *c;
301 int ccount;
302 static char *buf = NULL;
303 int failed;
304 char *nlp;
305 wchar_t lc;
306 int cw;
307
308 if (buf == NULL) {
309 fw_lBufsiz = BUFSIZ;
310 if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
311 exit(2); /* out of memory */
312 }
313 }
314
315 if (file) {
316 if ((fptr = fopen(file, "r")) == NULL) {
317 (void) fprintf(stderr,
318 gettext("fgrep: can't open %s\n"), file);
319 retcode = 2;
320 return;
321 }
322 } else {
323 fptr = stdin;
324 file = STDIN_FILENAME;
325 }
326 ccount = 0;
327 failed = 0;
328 lnum = 1;
329 tln = 0;
330 blkno = 0;
331 p = buf;
332 nlp = p;
333 c = w;
334 for (;;) {
335 if (c == 0)
336 break;
337 if (ccount <= 0) {
338 if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
339 if (nlp == buf) {
340 /* increase the buffer size */
341 fw_lBufsiz += BUFSIZ;
342 if ((buf = realloc(buf,
343 fw_lBufsiz + BUFSIZ)) == NULL) {
344 exit(2); /* out of memory */
345 }
346 nlp = buf;
347 p = &buf[fw_lBufsiz];
348 } else {
349 /* shift the buffer down */
350 (void) memmove(buf, nlp,
351 &buf[fw_lBufsiz + BUFSIZ]
352 - nlp);
353 p -= nlp - buf;
354 nlp = buf;
355 }
356
357 }
358 if (p > &buf[fw_lBufsiz]) {
359 if ((ccount = fread(p, sizeof (char),
360 &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
361 <= 0)
362 break;
363 } else if ((ccount = fread(p, sizeof (char),
364 BUFSIZ, fptr)) <= 0)
365 break;
366 blkno += (long long)ccount;
367 }
368 GETONE(lc, p);
369 nstate:
370 if (same(c->inp, lc)) {
371 c = c->nst;
372 } else if (c->link != 0) {
373 c = c->link;
374 goto nstate;
375 } else {
376 c = c->fail;
377 failed = 1;
378 if (c == 0) {
379 c = w;
380 istate:
381 if (same(c->inp, lc)) {
382 c = c->nst;
383 } else if (c->link != 0) {
384 c = c->link;
385 goto istate;
386 }
387 } else
388 goto nstate;
389 }
390
391 if (c == 0)
392 break;
393
394 if (c->out) {
395 while (lc != '\n') {
396 if (ccount <= 0) {
397 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
398 if (nlp == buf) {
399 /* increase buffer size */
400 fw_lBufsiz += BUFSIZ;
401 if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
402 exit(2); /* out of memory */
403 }
404 nlp = buf;
405 p = &buf[fw_lBufsiz];
406 } else {
407 /* shift buffer down */
408 (void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
409 p -= nlp - buf;
410 nlp = buf;
411 }
412 }
413 if (p > &buf[fw_lBufsiz]) {
414 if ((ccount = fread(p, sizeof (char),
415 &buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
416 } else if ((ccount = fread(p, sizeof (char), BUFSIZ,
417 fptr)) <= 0) break;
418 blkno += (long long)ccount;
419 }
420 GETONE(lc, p);
421 }
422 if ((vflag && (failed == 0 || xflag == 0)) ||
423 (vflag == 0 && xflag && failed))
424 goto nomatch;
425 succeed:
426 nsucc = 1;
427 if (lflag || qflag) {
428 if (!qflag)
429 (void) printf("%s\n", file);
430 (void) fclose(fptr);
431 return;
432 }
433 if (cflag) {
434 tln++;
435 } else {
436 if (Hflag || (nfile > 1 && !hflag))
437 (void) printf("%s:", file);
438 if (bflag)
439 (void) printf("%lld:",
440 (blkno - (long long)(ccount-1))
441 / BUFSIZ);
442 if (nflag)
443 (void) printf("%lld:", lnum);
444 if (p <= nlp) {
445 while (nlp < &buf[fw_lBufsiz + BUFSIZ])
446 (void) putchar(*nlp++);
447 nlp = buf;
448 }
449 while (nlp < p)
450 (void) putchar(*nlp++);
451 }
452 nomatch:
453 lnum++;
454 nlp = p;
455 c = w;
456 failed = 0;
457 continue;
458 }
459 if (lc == '\n')
460 if (vflag)
461 goto succeed;
462 else {
463 lnum++;
464 nlp = p;
465 c = w;
466 failed = 0;
467 }
468 }
469 (void) fclose(fptr);
470 if (cflag && !qflag) {
471 if (Hflag || (nfile > 1 && !hflag))
472 (void) printf("%s:", file);
473 (void) printf("%lld\n", tln);
474 }
475 }
476
477
478 wchar_t
479 getargc(void)
480 {
481 /* appends a newline to shell quoted argument list so */
482 /* the list looks like it came from an ed style file */
483 wchar_t c;
484 int cw;
485 int b;
486 static int endflg;
487
488
489 if (wordf) {
490 if ((b = getc(wordf)) == EOF)
491 return (EOF);
492 cw = ISASCII(c = (wchar_t)b) ? 1 :
493 (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
494 while (--cw) {
495 if ((b = getc(wordf)) == EOF)
496 return (EOF);
497 c = (c << 7) | (b & 0177);
498 }
499 return (iflag ? letter(c) : c);
500 }
501
502 if (endflg)
503 return (EOF);
504
505 {
506 cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
507 (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
508
509 while (--cw)
510 c = (c << 7) | ((*argptr++) & 0177);
511 if (c == '\0') {
512 endflg++;
513 return ('\n');
514 }
515 }
516 return (iflag ? letter(c) : c);
517
518
519 }
520
521 void
522 cgotofn(void)
523 {
524 int c;
525 struct words *s;
526
527 s = smax = w;
528 nword:
529 for (;;) {
530 c = getargc();
531 if (c == EOF)
532 return;
533 if (c == 0)
534 goto enter;
535 if (c == '\n') {
536 if (xflag) {
537 for (;;) {
538 if (s->inp == c) {
539 s = s->nst;
540 break;
541 }
542 if (s->inp == 0)
543 goto nenter;
544 if (s->link == 0) {
545 if (smax >= &w[input_size -1])
546 overflo();
547 s->link = ++smax;
548 s = smax;
549 goto nenter;
550 }
551 s = s->link;
552 }
553 }
554 s->out = 1;
555 s = w;
556 } else {
557 loop:
558 if (s->inp == c) {
559 s = s->nst;
560 continue;
561 }
562 if (s->inp == 0)
563 goto enter;
564 if (s->link == 0) {
565 if (smax >= &w[input_size -1])
566 overflo();
567 s->link = ++smax;
568 s = smax;
569 goto enter;
570 }
571 s = s->link;
572 goto loop;
573 }
574 }
575
576 enter:
577 do {
578 s->inp = c;
579 if (smax >= &w[input_size -1])
580 overflo();
581 s->nst = ++smax;
582 s = smax;
583 } while ((c = getargc()) != '\n' && c != EOF);
584 if (xflag) {
585 nenter:
586 s->inp = '\n';
587 if (smax >= &w[input_size -1])
588 overflo();
589 s->nst = ++smax;
590 }
591 smax->out = 1;
592 s = w;
593 if (c != EOF)
594 goto nword;
595 }
596
597 /*
598 * This function is an unexpected condition, since input_size should have been
599 * calculated correctly before hand.
600 */
601
602 void
603 overflo(void)
604 {
605 (void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
606 exit(2);
607 }
608
609 void
610 cfail(void)
611 {
612 int qsize = QSIZE;
613 struct words **queue = NULL;
614
615 /*
616 * front and rear are pointers used to traverse the global words
617 * structure "w" which contains the data of input pattern file
618 */
619 struct words **front, **rear;
620 struct words *state;
621 unsigned long frontoffset = 0, rearoffset = 0;
622 char c;
623 struct words *s;
624 s = w;
625 if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
626 == NULL) {
627 perror("fgrep");
628 exit(2);
629 }
630 front = rear = queue;
631 init:
632 if ((s->inp) != 0) {
633 *rear++ = s->nst;
634 /*
635 * Reallocates the queue if the number of distinct starting
636 * character of patterns exceeds the qsize value
637 */
638 if (rear >= &queue[qsize - 1]) {
639 frontoffset = front - queue;
640 rearoffset = rear - queue;
641 qsize += QSIZE;
642 if ((queue = (struct words **)realloc(queue,
643 qsize * sizeof (struct words *))) == NULL) {
644 perror("fgrep");
645 exit(2);
646 }
647 front = queue + frontoffset;
648 rear = queue + rearoffset;
649 }
650 }
651 if ((s = s->link) != 0) {
652 goto init;
653 }
654
655 while (rear != front) {
656 s = *front++;
657 cloop:
658 if ((c = s->inp) != 0) {
659 *rear++ = (q = s->nst);
660 /*
661 * Reallocate the queue if the rear pointer reaches the end
662 * queue
663 */
664 if (rear >= &queue[qsize - 1]) {
665 frontoffset = front - queue;
666 rearoffset = rear - queue;
667 qsize += QSIZE;
668 if ((queue = (struct words **)realloc(queue,
669 qsize * sizeof (struct words *))) == NULL) {
670 perror("fgrep");
671 exit(2);
672 }
673 front = queue + frontoffset;
674 rear = queue + rearoffset;
675 }
676 state = s->fail;
677 floop:
678 if (state == 0)
679 state = w;
680 if (state->inp == c) {
681 qloop:
682 q->fail = state->nst;
683 if ((state->nst)->out == 1)
684 q->out = 1;
685 if ((q = q->link) != 0)
686 goto qloop;
687 } else if ((state = state->link) != 0)
688 goto floop;
689 }
690 if ((s = s->link) != 0)
691 goto cloop;
692 }
693 }
694
695 wchar_t
696 letter(wchar_t c)
697 {
698 if (c >= 'a' && c <= 'z')
699 return (c);
700 if (c >= 'A' && c <= 'Z')
701 return (c + 'a' - 'A');
702 return (c);
703 }