Print this page
3737 grep does not support -H option
Reviewed by: Andy Stormont <andyjstormont@gmail.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/egrep/egrep.y
+++ new/usr/src/cmd/egrep/egrep.y
1 1 %{
2 2 /*
3 3 * CDDL HEADER START
4 4 *
5 5 * The contents of this file are subject to the terms of the
6 6 * Common Development and Distribution License, Version 1.0 only
7 7 * (the "License"). You may not use this file except in compliance
8 8 * with the License.
9 9 *
10 10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 11 * or http://www.opensolaris.org/os/licensing.
12 12 * See the License for the specific language governing permissions
13 13 * and limitations under the License.
14 14 *
15 15 * When distributing Covered Code, include this CDDL HEADER in each
16 16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 17 * If applicable, add the following below this CDDL HEADER, with the
18 18 * fields enclosed by brackets "[]" replaced with your own identifying
19 19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 20 *
21 21 * CDDL HEADER END
22 22 */
23 23 %}
24 24 /*
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
25 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 30 /* All Rights Reserved */
31 31
32 32 /* Copyright (c) 1987, 1988 Microsoft Corporation */
33 33 /* All Rights Reserved */
34 34
35 -%{
36 -#pragma ident "%Z%%M% %I% %E% SMI"
37 -%}
35 +/*
36 + * Copyright 2013 Damian Bogel. All rights reserved.
37 + */
38 38
39 39 /*
40 40 * egrep -- print lines containing (or not containing) a regular expression
41 41 *
42 42 * status returns:
43 43 * 0 - ok, and some matches
44 44 * 1 - ok, but no matches
45 45 * 2 - some error; matches irrelevant
46 46 */
47 47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
48 48 %left OR
49 49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
50 50 %left CAT
51 51 %left STAR PLUS QUEST
52 52
53 53 %{
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
54 54 #include <stdio.h>
55 55 #include <ctype.h>
56 56 #include <memory.h>
57 57 #include <wchar.h>
58 58 #include <wctype.h>
59 59 #include <widec.h>
60 60 #include <stdlib.h>
61 61 #include <limits.h>
62 62 #include <locale.h>
63 63
64 +#define STDIN_FILENAME gettext("(standard input)")
65 +
64 66 #define BLKSIZE 512 /* size of reported disk blocks */
65 67 #define EBUFSIZ 8192
66 68 #define MAXLIN 350
67 69 #define NCHARS 256
68 70 #define MAXPOS 4000
69 71 #define NSTATES 64
70 72 #define FINAL -1
71 73 #define RIGHT '\n' /* serves as record separator and as $ */
72 74 #define LEFT '\n' /* beginning of line */
73 75 int gotofn[NSTATES][NCHARS];
74 76 int state[NSTATES];
75 77 int out[NSTATES];
76 78 int line = 1;
77 79 int *name;
78 80 int *left;
79 81 int *right;
80 82 int *parent;
81 83 int *foll;
82 84 int *positions;
83 85 char *chars;
84 86 wchar_t *lower;
85 87 wchar_t *upper;
86 88 int maxlin, maxclin, maxwclin, maxpos;
87 89 int nxtpos = 0;
88 90 int inxtpos;
89 91 int nxtchar = 0;
90 92 int *tmpstat;
91 93 int *initstat;
92 94 int istat;
93 95 int nstate = 1;
94 96 int xstate;
95 97 int count;
96 98 int icount;
97 99 char *input;
98 100
99 101
100 102 wchar_t lyylval;
101 103 wchar_t nextch();
102 104 wchar_t maxmin();
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
103 105 int compare();
104 106 void overflo();
105 107
106 108 char reinit = 0;
107 109
108 110 long long lnum;
109 111 int bflag;
110 112 int cflag;
111 113 int eflag;
112 114 int fflag;
115 +int Hflag;
113 116 int hflag;
114 117 int iflag;
115 118 int lflag;
116 119 int nflag;
117 -int sflag;
120 +int qflag;
118 121 int vflag;
119 122 int nfile;
120 123 long long blkno;
121 124 long long tln;
122 125 int nsucc;
123 126 int badbotch;
124 127 extern char *optarg;
125 128 extern int optind;
126 129
127 130 int f;
128 131 FILE *expfile;
129 132 %}
130 133
131 134 %%
132 135 s: t
133 136 {
134 137 unary(FINAL, $1);
135 138 line--;
136 139 }
137 140 ;
138 141 t: b r
139 142 { $$ = node(CAT, $1, $2); }
140 143 | OR b r OR
141 144 { $$ = node(CAT, $2, $3); }
142 145 | OR b r
143 146 { $$ = node(CAT, $2, $3); }
144 147 | b r OR
145 148 { $$ = node(CAT, $1, $2); }
146 149 ;
147 150 b:
148 151 { /* if(multibyte)
149 152 $$ = mdotenter();
150 153 else */
151 154 $$ = enter(DOT);
152 155 $$ = unary(STAR, $$);
153 156 }
154 157 ;
155 158 r: CHAR
156 159 { $$ = iflag && isalpha($1) ?
157 160 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
158 161 | MCHAR
159 162 { $$ = (iflag && iswalpha(lyylval)) ?
160 163 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) :
161 164 mchar(lyylval); }
162 165 | DOT
163 166 { if(multibyte)
164 167 $$ = mdotenter();
165 168 else
166 169 $$ = enter(DOT);
167 170 }
168 171 | CCL
169 172 { $$ = cclenter(CCL); }
170 173 | NCCL
171 174 { $$ = cclenter(NCCL); }
172 175 | MCCL
173 176 { $$ = ccl(CCL); }
174 177 | NMCCL
175 178 { $$ = ccl(NCCL); }
176 179 ;
177 180
178 181 r: r OR r
179 182 { $$ = node(OR, $1, $3); }
180 183 | r r %prec CAT
181 184 { $$ = node(CAT, $1, $2); }
182 185 | r STAR
183 186 { $$ = unary(STAR, $1); }
184 187 | r PLUS
185 188 { $$ = unary(PLUS, $1); }
186 189 | r QUEST
187 190 { $$ = unary(QUEST, $1); }
188 191 | '(' r ')'
189 192 { $$ = $2; }
190 193 | error
191 194 ;
192 195
193 196 %%
194 197 void add(int *, int);
195 198 void clearg(void);
196 199 void execute(char *);
197 200 void follow(int);
198 201 int mgetc(void);
199 202 void synerror(void);
200 203
201 204
202 205 void
203 206 yyerror(char *s)
204 207 {
205 208 fprintf(stderr, "egrep: %s\n", s);
206 209 exit(2);
207 210 }
208 211
209 212 int
210 213 yylex(void)
211 214 {
212 215 extern int yylval;
213 216 int cclcnt, x, ccount, oldccount;
214 217 wchar_t c, lc;
215 218
216 219 c = nextch();
217 220 switch(c) {
218 221 case '^':
219 222 yylval = LEFT;
220 223 return(CHAR);
221 224 case '$':
222 225 c = RIGHT;
223 226 goto defchar;
224 227 case '|': return (OR);
225 228 case '*': return (STAR);
226 229 case '+': return (PLUS);
227 230 case '?': return (QUEST);
228 231 case '(': return (c);
229 232 case ')': return (c);
230 233 case '.': return(DOT);
231 234 case '\0': return (0);
232 235 case RIGHT: return (OR);
233 236 case '[':
234 237 x = (multibyte ? MCCL : CCL);
235 238 cclcnt = 0;
236 239 count = nxtchar++;
237 240 if ((c = nextch()) == '^') {
238 241 x = (multibyte ? NMCCL : NCCL);
239 242 c = nextch();
240 243 }
241 244 lc = 0;
242 245 do {
243 246 if (iflag && iswalpha(c))
244 247 c = towlower(c);
245 248 if (c == '\0') synerror();
246 249 if (c == '-' && cclcnt > 0 && lc != 0) {
247 250 if ((c = nextch()) != 0) {
248 251 if(c == ']') {
249 252 chars[nxtchar++] = '-';
250 253 cclcnt++;
251 254 break;
252 255 }
253 256 if (iflag && iswalpha(c))
254 257 c = towlower(c);
255 258 if (!multibyte ||
256 259 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) &&
257 260 lc < c &&
258 261 !iswcntrl(c) && !iswcntrl(lc)) {
259 262 if (nxtchar >= maxclin)
260 263 if (allocchars() == 0)
261 264 overflo();
262 265 chars[nxtchar++] = '-';
263 266 cclcnt++;
264 267 }
265 268 }
266 269 }
267 270 ccount = oldccount = nxtchar;
268 271 if(ccount + MB_LEN_MAX >= maxclin)
269 272 if(allocchars() == 0)
270 273 overflo();
271 274 ccount += wctomb(&chars[ccount], c);
272 275 cclcnt += ccount - oldccount;
273 276 nxtchar += ccount - oldccount;
274 277 lc = c;
275 278 } while ((c = nextch()) != ']');
276 279 chars[count] = cclcnt;
277 280 return(x);
278 281
279 282 case '\\':
280 283 if ((c = nextch()) == '\0') synerror();
281 284 defchar:
282 285 default:
283 286 if (c <= 0177) {
284 287 yylval = c;
285 288 return (CHAR);
286 289 } else {
287 290 lyylval = c;
288 291 return (MCHAR);
289 292 }
290 293 }
291 294 }
292 295
293 296 wchar_t
294 297 nextch(void)
295 298 {
296 299 wchar_t lc;
297 300 char multic[MB_LEN_MAX];
298 301 int length, d;
299 302 if (fflag) {
300 303 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0)
301 304 synerror();
302 305 if(length == 0)
303 306 lc = '\0';
304 307 }
305 308 else {
306 309 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
307 310 synerror();
308 311 if(length == 0)
309 312 return(0);
310 313 input += length;
311 314 }
312 315 return(lc);
313 316 }
314 317
315 318 int
316 319 mgetc(void)
317 320 {
318 321 return(getc(expfile));
319 322 }
320 323
321 324 void
322 325 synerror(void)
323 326 {
324 327 fprintf(stderr, gettext("egrep: syntax error\n"));
325 328 exit(2);
326 329 }
327 330
328 331 int
329 332 enter(int x)
330 333 {
331 334 if(line >= maxlin)
332 335 if(alloctree() == 0)
333 336 overflo();
334 337 name[line] = x;
335 338 left[line] = 0;
336 339 right[line] = 0;
337 340 return(line++);
338 341 }
339 342
340 343 int
341 344 cclenter(int x)
342 345 {
343 346 int linno;
344 347 linno = enter(x);
345 348 right[linno] = count;
346 349 return (linno);
347 350 }
348 351
349 352 int
350 353 node(int x, int l, int r)
351 354 {
352 355 if(line >= maxlin)
353 356 if(alloctree() == 0)
354 357 overflo();
355 358 name[line] = x;
356 359 left[line] = l;
357 360 right[line] = r;
358 361 parent[l] = line;
359 362 parent[r] = line;
360 363 return(line++);
361 364 }
362 365
363 366 int
364 367 unary(int x, int d)
365 368 {
366 369 if(line >= maxlin)
367 370 if(alloctree() == 0)
368 371 overflo();
369 372 name[line] = x;
370 373 left[line] = d;
371 374 right[line] = 0;
372 375 parent[d] = line;
373 376 return(line++);
374 377 }
375 378
376 379 int
377 380 allocchars(void)
378 381 {
379 382 maxclin += MAXLIN;
380 383 if((chars = realloc(chars, maxclin)) == (char *)0)
381 384 return 0;
382 385 return 1;
383 386 }
384 387
385 388 int
386 389 alloctree(void)
387 390 {
388 391 maxlin += MAXLIN;
389 392 if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
390 393 return 0;
391 394 if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
392 395 return 0;
393 396 if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
394 397 return 0;
395 398 if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
396 399 return 0;
397 400 if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
398 401 return 0;
399 402 if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
400 403 return 0;
401 404 if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
402 405 return 0;
403 406 return 1;
404 407 }
405 408
406 409 void
407 410 overflo(void)
408 411 {
409 412 fprintf(stderr, gettext("egrep: regular expression too long\n"));
410 413 exit(2);
411 414 }
412 415
413 416 void
414 417 cfoll(int v)
415 418 {
416 419 int i;
417 420 if (left[v] == 0) {
418 421 count = 0;
419 422 for (i=1; i<=line; i++) tmpstat[i] = 0;
420 423 follow(v);
421 424 add(foll, v);
422 425 }
423 426 else if (right[v] == 0) cfoll(left[v]);
424 427 else {
425 428 cfoll(left[v]);
426 429 cfoll(right[v]);
427 430 }
428 431 }
429 432
430 433 void
431 434 cgotofn(void)
432 435 {
433 436 int i;
434 437 count = 0;
435 438 inxtpos = nxtpos;
436 439 for (i=3; i<=line; i++) tmpstat[i] = 0;
437 440 if (cstate(line-1)==0) {
438 441 tmpstat[line] = 1;
439 442 count++;
440 443 out[1] = 1;
441 444 }
442 445 for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
443 446 count--; /*leave out position 1 */
444 447 icount = count;
445 448 tmpstat[1] = 0;
446 449 add(state, 1);
447 450 istat = nxtst(1, LEFT);
448 451 }
449 452
450 453 int
451 454 nxtst(int s, int c)
452 455 {
453 456 int i, num, k;
454 457 int pos, curpos, number, newpos;
455 458 num = positions[state[s]];
456 459 count = icount;
457 460 for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
458 461 pos = state[s] + 1;
459 462 for (i=0; i<num; i++) {
460 463 curpos = positions[pos];
461 464 k = name[curpos];
462 465 if (k >= 0)
463 466 if (
464 467 (k == c)
465 468 || (k == DOT && dot(c))
466 469 || (k == MDOT && mdot(c))
467 470 || (k == CCL && dot(c) && member(c, right[curpos], 1))
468 471 || (k == NCCL && dot(c) && member(c, right[curpos], 0))
469 472 || (k == MCCL && mdot(c) && member(c, right[curpos], 1))
470 473 ) {
471 474 number = positions[foll[curpos]];
472 475 newpos = foll[curpos] + 1;
473 476 for (k=0; k<number; k++) {
474 477 if (tmpstat[positions[newpos]] != 1) {
475 478 tmpstat[positions[newpos]] = 1;
476 479 count++;
477 480 }
478 481 newpos++;
479 482 }
480 483 }
481 484 pos++;
482 485 }
483 486 if (notin(nstate)) {
484 487 if (++nstate >= NSTATES) {
485 488 for (i=1; i<NSTATES; i++)
486 489 out[i] = 0;
487 490 for (i=1; i<NSTATES; i++)
488 491 for (k=0; k<NCHARS; k++)
489 492 gotofn[i][k] = 0;
490 493 nstate = 1;
491 494 nxtpos = inxtpos;
492 495 reinit = 1;
493 496 add(state, nstate);
494 497 if (tmpstat[line] == 1) out[nstate] = 1;
495 498 return nstate;
496 499 }
497 500 add(state, nstate);
498 501 if (tmpstat[line] == 1) out[nstate] = 1;
499 502 gotofn[s][c] = nstate;
500 503 return nstate;
501 504 }
502 505 else {
503 506 gotofn[s][c] = xstate;
504 507 return xstate;
505 508 }
506 509 }
507 510
508 511
509 512 int
510 513 cstate(int v)
511 514 {
512 515 int b;
513 516 if (left[v] == 0) {
514 517 if (tmpstat[v] != 1) {
515 518 tmpstat[v] = 1;
516 519 count++;
517 520 }
518 521 return(1);
519 522 }
520 523 else if (right[v] == 0) {
521 524 if (cstate(left[v]) == 0) return (0);
522 525 else if (name[v] == PLUS) return (1);
523 526 else return (0);
524 527 }
525 528 else if (name[v] == CAT) {
526 529 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
527 530 else return (1);
528 531 }
529 532 else { /* name[v] == OR */
530 533 b = cstate(right[v]);
531 534 if (cstate(left[v]) == 0 || b == 0) return (0);
532 535 else return (1);
533 536 }
534 537 }
535 538
536 539
537 540 int
538 541 dot(int c)
539 542 {
540 543 if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
541 544 return(0);
542 545 if(c == RIGHT || c == LEFT)
543 546 return(0);
544 547 return(1);
545 548 }
546 549
547 550 int
548 551 mdot(int c)
549 552 {
550 553 if(c >= 0200 && !iscntrl(c))
551 554 return(1);
552 555 return(0);
553 556 }
554 557
555 558 int
556 559 member(int symb, int set, int torf)
557 560 {
558 561 int i, num, pos, c, lc;
559 562 if(symb == RIGHT || symb == LEFT)
560 563 return(0);
561 564 num = chars[set];
562 565 pos = set + 1;
563 566 lc = 0;
564 567 if(iflag)
565 568 symb = tolower(symb);
566 569 for (i=0; i<num; i++) {
567 570 c = (unsigned char)chars[pos++];
568 571 if(c == '-' && lc != 0 && ++i < num) {
569 572 c = (unsigned char)chars[pos++];
570 573 if(lc <= symb && symb <= c)
571 574 return(torf);
572 575 }
573 576 if (symb == c)
574 577 return (torf);
575 578 lc = c;
576 579 }
577 580 return(!torf);
578 581 }
579 582
580 583 int
581 584 notin(int n)
582 585 {
583 586 int i, j, pos;
584 587 for (i=1; i<=n; i++) {
585 588 if (positions[state[i]] == count) {
586 589 pos = state[i] + 1;
587 590 for (j=0; j < count; j++)
588 591 if (tmpstat[positions[pos++]] != 1) goto nxt;
589 592 xstate = i;
590 593 return (0);
591 594 }
592 595 nxt: ;
593 596 }
594 597 return (1);
595 598 }
596 599
597 600 void
598 601 add(int *array, int n)
599 602 {
600 603 int i;
601 604 if (nxtpos + count >= maxpos) {
602 605 maxpos += MAXPOS + count;
603 606 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
604 607 overflo();
605 608 }
606 609 array[n] = nxtpos;
607 610 positions[nxtpos++] = count;
608 611 for (i=3; i <= line; i++) {
609 612 if (tmpstat[i] == 1) {
610 613 positions[nxtpos++] = i;
611 614 }
612 615 }
613 616 }
614 617
615 618 void
616 619 follow(int v)
617 620 {
618 621 int p;
619 622 if (v == line) return;
620 623 p = parent[v];
621 624 switch(name[p]) {
622 625 case STAR:
623 626 case PLUS: cstate(v);
624 627 follow(p);
625 628 return;
626 629
627 630 case OR:
628 631 case QUEST: follow(p);
629 632 return;
630 633
631 634 case CAT: if (v == left[p]) {
632 635 if (cstate(right[p]) == 0) {
633 636 follow(p);
634 637 return;
635 638 }
636 639 }
637 640 else follow(p);
638 641 return;
639 642 case FINAL: if (tmpstat[line] != 1) {
640 643 tmpstat[line] = 1;
641 644 count++;
642 645 }
643 646 return;
644 647 }
645 648 }
646 649
647 650 #define USAGE "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..."
648 651
649 652 int
650 653 main(int argc, char **argv)
651 654 {
652 655 char c;
↓ open down ↓ |
525 lines elided |
↑ open up ↑ |
653 656 char nl = '\n';
654 657 int errflag = 0;
655 658
656 659 (void)setlocale(LC_ALL, "");
657 660
658 661 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
659 662 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */
660 663 #endif
661 664 (void) textdomain(TEXT_DOMAIN);
662 665
663 - while((c = getopt(argc, argv, "ybcie:f:hlnvs")) != -1)
666 + while((c = getopt(argc, argv, "ybcie:f:Hhlnvs")) != -1)
664 667 switch(c) {
665 668
666 669 case 'b':
667 670 bflag++;
668 671 continue;
669 672
670 673 case 'c':
671 674 cflag++;
672 675 continue;
673 676
674 677 case 'e':
675 678 eflag++;
676 679 input = optarg;
677 680 continue;
678 681
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
679 682 case 'f':
680 683 fflag++;
681 684 expfile = fopen(optarg, "r");
682 685 if(expfile == NULL) {
683 686 fprintf(stderr,
684 687 gettext("egrep: can't open %s\n"), optarg);
685 688 exit(2);
686 689 }
687 690 continue;
688 691
692 + case 'H':
693 + if (!lflag) /* H is excluded by l as in GNU grep */
694 + Hflag++;
695 + hflag = 0; /* H excludes h */
696 + continue;
697 +
689 698 case 'h':
690 699 hflag++;
700 + Hflag = 0; /* h excludes H */
691 701 continue;
692 702
693 703 case 'y':
694 704 case 'i':
695 705 iflag++;
696 706 continue;
697 707
698 708 case 'l':
699 709 lflag++;
710 + Hflag = 0; /* l excludes H */
700 711 continue;
701 712
702 713 case 'n':
703 714 nflag++;
704 715 continue;
705 716
706 - case 's':
707 - sflag++;
717 + case 'q':
718 + case 's': /* Solaris: legacy option */
719 + qflag++;
708 720 continue;
709 721
710 722 case 'v':
711 723 vflag++;
712 724 continue;
713 725
714 726 case '?':
715 727 errflag++;
716 728 }
717 729 if (errflag || ((argc <= 0) && !fflag && !eflag)) {
718 730 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE));
719 731 exit(2);
720 732 }
721 733 if(!eflag && !fflag) {
722 734 input = argv[optind];
723 735 optind++;
724 736 }
725 737
726 738 argc -= optind;
727 739 argv = &argv[optind];
728 740
729 741 /* allocate initial space for arrays */
730 742 if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
731 743 overflo();
732 744 if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
733 745 overflo();
734 746 if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
735 747 overflo();
736 748 if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
737 749 overflo();
738 750 if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
739 751 overflo();
740 752 if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
741 753 overflo();
742 754 if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
743 755 overflo();
744 756 if((chars = (char *)malloc(MAXLIN)) == (char *)0)
745 757 overflo();
746 758 if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
747 759 overflo();
748 760 if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
749 761 overflo();
750 762 if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
751 763 overflo();
752 764 maxlin = MAXLIN;
753 765 maxclin = MAXLIN;
754 766 maxwclin = MAXLIN;
755 767 maxpos = MAXPOS;
756 768
757 769 yyparse();
758 770
759 771 cfoll(line-1);
760 772 cgotofn();
761 773 nfile = argc;
762 774 if (argc<=0) {
763 775 execute(0);
764 776 }
765 777 else while (--argc >= 0) {
766 778 if (reinit == 1) clearg();
767 779 execute(*argv++);
768 780 }
769 781 return (badbotch ? 2 : nsucc==0);
770 782 }
771 783
772 784 void
773 785 execute(char *file)
774 786 {
775 787 char *p;
776 788 int cstat;
777 789 wchar_t c;
778 790 int t;
779 791 long count;
780 792 long count1, count2;
781 793 long nchars;
782 794 int succ;
783 795 char *ptr, *ptrend, *lastptr;
784 796 char *buf;
785 797 long lBufSiz;
786 798 FILE *f;
787 799 int nlflag;
788 800
789 801 lBufSiz = EBUFSIZ;
790 802 if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) {
791 803 exit (2); /* out of memory - BAIL */
↓ open down ↓ |
74 lines elided |
↑ open up ↑ |
792 804 }
793 805
794 806 if (file) {
795 807 if ((f = fopen(file, "r")) == NULL) {
796 808 fprintf(stderr,
797 809 gettext("egrep: can't open %s\n"), file);
798 810 badbotch=1;
799 811 return;
800 812 }
801 813 } else {
802 - file = "<stdin>";
803 814 f = stdin;
815 + file = STDIN_FILENAME;
804 816 }
805 817 lnum = 1;
806 818 tln = 0;
807 819 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) {
808 820 fclose(f);
809 821
810 - if (cflag) {
811 - if (nfile>1 && !hflag)
822 + if (cflag && !qflag) {
823 + if (Hflag || (nfile > 1 && !hflag))
812 824 fprintf(stdout, "%s:", file);
813 825 fprintf(stdout, "%lld\n", tln);
814 826 }
815 827 return;
816 828 }
817 829
818 830 blkno = count;
819 831 ptr = buf;
820 832 for(;;) {
821 833 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
822 834 /*
823 835 move the unused partial record to the head of the buffer
824 836 */
825 837 if (ptr > buf) {
826 838 count = buf + count - ptr;
827 839 memmove (buf, ptr, count);
828 840 ptr = buf;
829 841 }
830 842
831 843 /*
832 844 Get a bigger buffer if this one is full
833 845 */
834 846 if(count > lBufSiz) {
835 847 /*
836 848 expand the buffer
837 849 */
838 850 lBufSiz += EBUFSIZ;
839 851 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) {
840 852 exit (2); /* out of memory - BAIL */
841 853 }
842 854
843 855 ptr = buf;
844 856 }
845 857
846 858 p = buf + count;
847 859 if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) {
848 860 count += count1;
849 861 blkno += count1;
850 862 continue;
851 863 }
852 864 ptrend = ptr + count;
853 865 nlflag = 0;
854 866 } else
855 867 nlflag = 1;
856 868 *ptrend = '\n';
857 869 p = ptr;
858 870 lastptr = ptr;
859 871 cstat = istat;
860 872 succ = 0;
861 873 for(;;) {
862 874 if(out[cstat]) {
863 875 if(multibyte && p > ptr) {
864 876 wchar_t wchar;
865 877 int length;
866 878 char *endptr = p;
867 879 p = lastptr;
868 880 while(p < endptr) {
869 881 length = mbtowc(&wchar, p, MB_LEN_MAX);
870 882 if(length <= 1)
871 883 p++;
872 884 else
873 885 p += length;
874 886 }
875 887 if(p == endptr) {
876 888 succ = !vflag;
877 889 break;
878 890 }
879 891 cstat = 1;
880 892 length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
881 893 if(length <= 1)
882 894 lastptr++;
883 895 else
884 896 lastptr += length;
885 897 p = lastptr;
886 898 continue;
887 899 }
888 900 succ = !vflag;
889 901 break;
890 902 }
891 903 c = (unsigned char)*p++;
892 904 if ((t = gotofn[cstat][c]) == 0)
893 905 cstat = nxtst(cstat, c);
894 906 else
↓ open down ↓ |
73 lines elided |
↑ open up ↑ |
895 907 cstat = t;
896 908 if(c == RIGHT) {
897 909 if(out[cstat]) {
898 910 succ = !vflag;
899 911 break;
900 912 }
901 913 succ = vflag;
902 914 break;
903 915 }
904 916 }
905 - if(succ) {
917 + if (succ) {
906 918 nsucc = 1;
907 - if (cflag) tln++;
908 - else if (sflag)
909 - ; /* ugh */
910 - else if (lflag) {
911 - printf("%s\n", file);
919 + if (lflag || qflag) {
920 + if (!qflag)
921 + (void) printf("%s\n", file);
912 922 fclose(f);
913 923 return;
914 924 }
915 - else {
916 - if (nfile > 1 && !hflag)
917 - printf(gettext("%s:"), file);
925 + if (cflag) {
926 + tln++;
927 + } else {
928 + if (Hflag || (nfile > 1 && !hflag))
929 + printf("%s:", file);
918 930 if (bflag) {
919 931 nchars = blkno - (buf + count - ptrend) - 2;
920 932 if(nlflag)
921 933 nchars++;
922 934 printf("%lld:", nchars/BLKSIZE);
923 935 }
924 936 if (nflag)
925 937 printf("%lld:", lnum);
926 938 if(nlflag)
927 939 nchars = ptrend - ptr + 1;
928 940 else
929 941 nchars = ptrend - ptr;
930 942 fwrite(ptr, (size_t)1, (size_t)nchars, stdout);
931 943 }
932 944 }
933 945 if(!nlflag)
934 946 break;
935 947 ptr = ptrend + 1;
936 948 if(ptr >= buf + count) {
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
937 949 ptr = buf;
938 950 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0)
939 951 break;
940 952 blkno += count;
941 953 }
942 954 lnum++;
943 955 if (reinit == 1)
944 956 clearg();
945 957 }
946 958 fclose(f);
947 - if (cflag) {
948 - if (nfile > 1 && !hflag)
949 - printf(gettext("%s:"), file);
959 + if (cflag && !qflag) {
960 + if (Hflag || (nfile > 1 && !hflag))
961 + printf("%s:", file);
950 962 printf("%lld\n", tln);
951 963 }
952 964 }
953 965
954 966 void
955 967 clearg(void)
956 968 {
957 969 int i, k;
958 970 for (i=1; i<=nstate; i++)
959 971 out[i] = 0;
960 972 for (i=1; i<=nstate; i++)
961 973 for (k=0; k<NCHARS; k++)
962 974 gotofn[i][k] = 0;
963 975 nstate = 1;
964 976 nxtpos = inxtpos;
965 977 reinit = 0;
966 978 count = 0;
967 979 for (i=3; i<=line; i++) tmpstat[i] = 0;
968 980 if (cstate(line-1)==0) {
969 981 tmpstat[line] = 1;
970 982 count++;
971 983 out[1] = 1;
972 984 }
973 985 for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
974 986 count--; /*leave out position 1 */
975 987 icount = count;
976 988 tmpstat[1] = 0;
977 989 add(state, 1);
978 990 istat = nxtst(1, LEFT);
979 991 }
980 992
981 993 int
982 994 mdotenter(void)
983 995 {
984 996 int i, x1, x2;
985 997 x1 = enter(DOT);
986 998 x2 = enter(MDOT);
987 999 for(i = 1; i < (int) eucw1; i++)
988 1000 x2 = node(CAT, x2, enter(MDOT));
989 1001 x1 = node(OR, x1, x2);
990 1002 if(eucw2) {
991 1003 x2 = enter('\216');
992 1004 for(i = 1; i <= (int) eucw2; i++)
993 1005 x2 = node(CAT, x2, enter(MDOT));
994 1006 x1 = node(OR, x1, x2);
995 1007 }
996 1008 if(eucw3) {
997 1009 x2 = enter('\217');
998 1010 for(i = 1; i <= (int) eucw3; i++)
999 1011 x2 = node(CAT, x2, enter(MDOT));
1000 1012 x1 = node(OR, x1, x2);
1001 1013 }
1002 1014 return(x1);
1003 1015 }
1004 1016
1005 1017 int
1006 1018 mchar(wchar_t c)
1007 1019 {
1008 1020 char multichar[MB_LEN_MAX+1];
1009 1021 char *p;
1010 1022 int x1, lc, length;
1011 1023
1012 1024 length = wctomb(multichar, c);
1013 1025 p = multichar;
1014 1026 *(p + length) = '\0';
1015 1027 x1 = enter((unsigned char)*p++);
1016 1028 while(lc = (unsigned char)*p++)
1017 1029 x1 = node(CAT, x1, enter(lc));
1018 1030 return(x1);
1019 1031 }
1020 1032
1021 1033 int
1022 1034 ccl(int type)
1023 1035 {
1024 1036 wchar_t c, lc;
1025 1037 char multic1[MB_LEN_MAX];
1026 1038 char multic2[MB_LEN_MAX];
1027 1039 int x1, x2, length, current, last, cclcnt;
1028 1040 x2 = 0;
1029 1041 current = 0;
1030 1042 last = genrange(type);
1031 1043 nxtchar = count + 1;
1032 1044 cclcnt = 0;
1033 1045 /* create usual character class for single byte characters */
1034 1046 while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
1035 1047 cclcnt++;
1036 1048 chars[nxtchar++] = c;
1037 1049 if(lower[current] != upper[current]) {
1038 1050 chars[nxtchar++] = '-';
1039 1051 chars[nxtchar++] = upper[current];
1040 1052 cclcnt += 2;
1041 1053 }
1042 1054 current++;
1043 1055 }
1044 1056
1045 1057 if(cclcnt)
1046 1058 chars[count] = cclcnt;
1047 1059 else
1048 1060 nxtchar = count;
1049 1061 if(current > 0)
1050 1062 /* single byte part of character class */
1051 1063 x2 = cclenter(type);
1052 1064 else if(type == NCCL)
1053 1065 /* all single byte characters match */
1054 1066 x2 = enter(DOT);
1055 1067 while(current <= last) {
1056 1068 if(upper[current] == lower[current])
1057 1069 x1 = mchar(lower[current]);
1058 1070 else {
1059 1071 length = wctomb(multic1, lower[current]);
1060 1072 wctomb(multic2, upper[current]);
1061 1073 x1 = range((unsigned char *)multic1,
1062 1074 (unsigned char *)multic2, length);
1063 1075 }
1064 1076 if(x2)
1065 1077 x2 = node(OR, x2, x1);
1066 1078 else
1067 1079 x2 = x1;
1068 1080 current++;
1069 1081 }
1070 1082 return x2;
1071 1083 }
1072 1084
1073 1085 int
1074 1086 range(unsigned char *p1, unsigned char *p2, int length)
1075 1087 {
1076 1088 char multic[MB_LEN_MAX+1];
1077 1089 char *p;
1078 1090 int i, x1, x2;
1079 1091 if(length == 1)
1080 1092 return(classenter(*p1, *p2));
1081 1093 if(p1[0] == p2[0])
1082 1094 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));
1083 1095 p = multic;
1084 1096 for(i = 1; i < length; i++)
1085 1097 *p++ = 0377;
1086 1098 x1 = node(CAT, enter(p1[0]),
1087 1099 range(p1+1, (unsigned char *)multic, length - 1));
1088 1100 if((unsigned char)(p1[0] + 1) < p2[0]) {
1089 1101 x2 = classenter(p1[0] + 1, p2[0] - 1);
1090 1102 for(i = 1; i < length; i++)
1091 1103 x2 = node(CAT, x2, enter(MDOT));
1092 1104 x1 = node(OR, x1, x2);
1093 1105 }
1094 1106 p = multic;
1095 1107 for(i = 1; i < length; i++)
1096 1108 *p++ = 0200;
1097 1109 x2 = node(CAT, enter(p2[0]),
1098 1110 range((unsigned char *)multic, p2+1, length - 1));
1099 1111 return node(OR, x1, x2);
1100 1112 }
1101 1113
1102 1114 int
1103 1115 classenter(int x1, int x2)
1104 1116 {
1105 1117 static int max, min;
1106 1118 if(!max) {
1107 1119 int i;
1108 1120 for(i = 0200; i <= 0377; i++)
1109 1121 if(!iscntrl(i))
1110 1122 break;
1111 1123 min = i;
1112 1124 for(i = 0377; i >= 0200; i--)
1113 1125 if(!iscntrl(i))
1114 1126 break;
1115 1127 max = i;
1116 1128 }
1117 1129 if(x1 <= min && x2 >= max)
1118 1130 return enter(MDOT);
1119 1131 if(nxtchar + 4 >= maxclin)
1120 1132 if(allocchars() == 0)
1121 1133 overflo();
1122 1134 count = nxtchar++;
1123 1135 chars[nxtchar++] = x1;
1124 1136 chars[nxtchar++] = '-';
1125 1137 chars[nxtchar++] = x2;
1126 1138 chars[count] = 3;
1127 1139 return cclenter(MCCL);
1128 1140 }
1129 1141
1130 1142 int
1131 1143 genrange(int type)
1132 1144 {
1133 1145 char *p, *endp;
1134 1146 int current, nel, i, last, length;
1135 1147 wchar_t c, lc;
1136 1148
1137 1149 current = 0;
1138 1150 p = &chars[count+1];
1139 1151 endp = &chars[count+1] + chars[count];
1140 1152 lc = 0;
1141 1153
1142 1154 /* convert character class into union of ranges */
1143 1155 while(p < endp) {
1144 1156 length = mbtowc(&c, p, MB_LEN_MAX);
1145 1157 p += length;
1146 1158 if(c == '-' && lc != 0) {
1147 1159 length = mbtowc(&c, p, MB_LEN_MAX);
1148 1160 upper[current-1] = c;
1149 1161 p += length;
1150 1162 } else {
1151 1163 lower[current] = c;
1152 1164 upper[current++] = c;
1153 1165 }
1154 1166 lc = c;
1155 1167 }
1156 1168 nel = current;
1157 1169 /* sort lower and upper bounds of ranges */
1158 1170 qsort((char *)lower, nel, sizeof(wchar_t), compare);
1159 1171 qsort((char *)upper, nel, sizeof(wchar_t), compare);
1160 1172 last = current - 1;
1161 1173 current = 0;
1162 1174 /* combine overlapping or adjacent ranges */
1163 1175 for(i = 0; i < last; i++)
1164 1176 if(upper[i] >= lower[i+1] - 1)
1165 1177 upper[current] = upper[i+1];
1166 1178 else {
1167 1179 lower[++current] = lower[i+1];
1168 1180 upper[current] = upper[i+1];
1169 1181 }
1170 1182 if(type == NCCL) {
1171 1183 /* find complement of character class */
1172 1184 int j, next;
1173 1185 i = 0;
1174 1186 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
1175 1187 i++;
1176 1188 if(i > current) {
1177 1189 /* match all multibyte characters */
1178 1190 if(eucw2) {
1179 1191 lower[i] = maxmin(WCHAR_CS2, 0);
1180 1192 upper[i++] = maxmin(WCHAR_CS2, 1);
1181 1193 }
1182 1194 if(eucw3) {
1183 1195 lower[i] = maxmin(WCHAR_CS3, 0);
1184 1196 upper[i++] = maxmin(WCHAR_CS3, 1);
1185 1197 }
1186 1198 lower[i] = maxmin(WCHAR_CS1, 0);
1187 1199 upper[i++] = maxmin(WCHAR_CS1, 1);
1188 1200 return i - 1;
1189 1201 }
1190 1202 next = current + 1;
1191 1203 if(next + current + 2 >= maxwclin) {
1192 1204 maxwclin += MAXLIN + next + current + 2;
1193 1205 if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
1194 1206 (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
1195 1207 overflo();
1196 1208 }
1197 1209 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) {
1198 1210 lower[next] = maxmin(WCHAR_CS2, 0);
1199 1211 if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) {
1200 1212 upper[next++] = maxmin(WCHAR_CS2, 1);
1201 1213 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) {
1202 1214 lower[next] = maxmin(WCHAR_CS3, 0);
1203 1215 upper[next++] = maxmin(WCHAR_CS3, 1);
1204 1216 }
1205 1217 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1206 1218 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1207 1219 upper[next++] = lower[i] - 1;
1208 1220 }
1209 1221 } else
1210 1222 upper[next++] = lower[i] - 1;
1211 1223 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1212 1224 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1213 1225 upper[next++] = lower[i] - 1;
1214 1226 }
1215 1227 for(j = i; j < current; j++) {
1216 1228 if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) {
1217 1229 lower[next] = upper[j] + 1;
1218 1230 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) {
1219 1231 upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1);
1220 1232 if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) {
1221 1233 lower[next] = maxmin(WCHAR_CS3, 0);
1222 1234 upper[next++] = maxmin(WCHAR_CS3, 1);
1223 1235 }
1224 1236 if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) {
1225 1237 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0);
1226 1238 upper[next++] = lower[j+1] - 1;
1227 1239 }
1228 1240 } else
1229 1241 upper[next++] = lower[j+1] - 1;
1230 1242 } else if(lower[j+1] > maxmin(lower[j+1], 0)) {
1231 1243 lower[next] = maxmin(lower[j+1], 0);
1232 1244 upper[next++] = lower[j+1] - 1;
1233 1245 }
1234 1246 }
1235 1247 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) {
1236 1248 lower[next] = upper[current] + 1;
1237 1249 upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1);
1238 1250 }
1239 1251 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) {
1240 1252 if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) {
1241 1253 lower[next] = maxmin(WCHAR_CS3, 0);
1242 1254 upper[next++] = maxmin(WCHAR_CS3, 1);
1243 1255 }
1244 1256 lower[next] = maxmin(WCHAR_CS1, 0);
1245 1257 upper[next++] = maxmin(WCHAR_CS1, 1);
1246 1258 }
1247 1259 for(j = current + 1; j < next; j++) {
1248 1260 lower[i] = lower[j];
1249 1261 upper[i++] = upper[j];
1250 1262 }
1251 1263 current = i - 1;
1252 1264 }
1253 1265 return(current);
1254 1266 }
1255 1267
1256 1268 int
1257 1269 compare(wchar_t *c, wchar_t *d)
1258 1270 {
1259 1271 if(*c < *d)
1260 1272 return -1;
1261 1273 if(*c == *d)
1262 1274 return 0;
1263 1275 return 1;
1264 1276 }
1265 1277
1266 1278 wchar_t
1267 1279 maxmin(wchar_t c, int flag)
1268 1280 {
1269 1281 static wchar_t minmax1[2], minmax2[2], minmax3[2];
1270 1282
1271 1283 if(!minmax1[0]) {
1272 1284 /* compute min and max process codes for all code sets */
1273 1285 int length, i;
1274 1286 char multic[MB_LEN_MAX], minmax[2];
1275 1287 for(i = 0377; i >= 0200; i--)
1276 1288 if(!iscntrl(i))
1277 1289 break;
1278 1290 minmax[1] = i;
1279 1291 for(i = 0240; i <= 0377; i++)
1280 1292 if(!iscntrl(i))
1281 1293 break;
1282 1294 minmax[0] = i;
1283 1295 for(i = 0; i <= 1; i++) {
1284 1296 length = MB_LEN_MAX;
1285 1297 while(length--)
1286 1298 multic[length] = minmax[i];
1287 1299 mbtowc(&minmax1[i], multic, MB_LEN_MAX);
1288 1300 if(eucw2) {
1289 1301 multic[0] = SS2;
1290 1302 mbtowc(&minmax2[i], multic, MB_LEN_MAX);
1291 1303 }
1292 1304 if(eucw3) {
1293 1305 multic[0] = SS3;
1294 1306 mbtowc(&minmax3[i], multic, MB_LEN_MAX);
1295 1307 }
1296 1308 }
1297 1309 }
1298 1310 switch(c) {
1299 1311 case WCHAR_CS1: return minmax1[flag];
1300 1312 case WCHAR_CS2: return minmax2[flag];
1301 1313 case WCHAR_CS3: return minmax3[flag];
1302 1314 }
1303 1315
1304 1316 /* NOTREACHED */
1305 1317 return (0);
1306 1318 }
↓ open down ↓ |
347 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX