1 %{
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License, Version 1.0 only
7 * (the "License"). You may not use this file except in compliance
8 * with the License.
9 *
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 *
21 * CDDL HEADER END
22 */
23 %}
24 /*
25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 /* Copyright (c) 1987, 1988 Microsoft Corporation */
33 /* All Rights Reserved */
34
35 %{
36 #pragma ident "%Z%%M% %I% %E% SMI"
37 %}
38
39 /*
40 * egrep -- print lines containing (or not containing) a regular expression
41 *
42 * status returns:
43 * 0 - ok, and some matches
44 * 1 - ok, but no matches
45 * 2 - some error; matches irrelevant
46 */
47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
48 %left OR
49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
50 %left CAT
51 %left STAR PLUS QUEST
52
53 %{
54 #include <stdio.h>
55 #include <ctype.h>
56 #include <memory.h>
57 #include <wchar.h>
58 #include <wctype.h>
59 #include <widec.h>
60 #include <stdlib.h>
61 #include <limits.h>
62 #include <locale.h>
63
64 #define BLKSIZE 512 /* size of reported disk blocks */
65 #define EBUFSIZ 8192
66 #define MAXLIN 350
67 #define NCHARS 256
68 #define MAXPOS 4000
69 #define NSTATES 64
70 #define FINAL -1
71 #define RIGHT '\n' /* serves as record separator and as $ */
72 #define LEFT '\n' /* beginning of line */
73 int gotofn[NSTATES][NCHARS];
74 int state[NSTATES];
75 int out[NSTATES];
76 int line = 1;
77 int *name;
78 int *left;
79 int *right;
80 int *parent;
81 int *foll;
82 int *positions;
83 char *chars;
84 wchar_t *lower;
85 wchar_t *upper;
86 int maxlin, maxclin, maxwclin, maxpos;
87 int nxtpos = 0;
88 int inxtpos;
89 int nxtchar = 0;
90 int *tmpstat;
91 int *initstat;
92 int istat;
93 int nstate = 1;
94 int xstate;
95 int count;
96 int icount;
97 char *input;
98
99
100 wchar_t lyylval;
101 wchar_t nextch();
102 wchar_t maxmin();
103 int compare();
104 void overflo();
105
106 char reinit = 0;
107
108 long long lnum;
109 int bflag;
110 int cflag;
111 int eflag;
112 int fflag;
113 int hflag;
114 int iflag;
115 int lflag;
116 int nflag;
117 int sflag;
118 int vflag;
119 int nfile;
120 long long blkno;
121 long long tln;
122 int nsucc;
123 int badbotch;
124 extern char *optarg;
125 extern int optind;
126
127 int f;
128 FILE *expfile;
129 %}
130
131 %%
132 s: t
133 {
134 unary(FINAL, $1);
135 line--;
136 }
137 ;
138 t: b r
139 { $$ = node(CAT, $1, $2); }
140 | OR b r OR
141 { $$ = node(CAT, $2, $3); }
142 | OR b r
143 { $$ = node(CAT, $2, $3); }
144 | b r OR
145 { $$ = node(CAT, $1, $2); }
146 ;
147 b:
148 { /* if(multibyte)
149 $$ = mdotenter();
150 else */
151 $$ = enter(DOT);
152 $$ = unary(STAR, $$);
153 }
154 ;
155 r: CHAR
156 { $$ = iflag && isalpha($1) ?
157 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
158 | MCHAR
159 { $$ = (iflag && iswalpha(lyylval)) ?
160 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) :
161 mchar(lyylval); }
162 | DOT
163 { if(multibyte)
164 $$ = mdotenter();
165 else
166 $$ = enter(DOT);
167 }
168 | CCL
169 { $$ = cclenter(CCL); }
170 | NCCL
171 { $$ = cclenter(NCCL); }
172 | MCCL
173 { $$ = ccl(CCL); }
174 | NMCCL
175 { $$ = ccl(NCCL); }
176 ;
177
178 r: r OR r
179 { $$ = node(OR, $1, $3); }
180 | r r %prec CAT
181 { $$ = node(CAT, $1, $2); }
182 | r STAR
183 { $$ = unary(STAR, $1); }
184 | r PLUS
185 { $$ = unary(PLUS, $1); }
186 | r QUEST
187 { $$ = unary(QUEST, $1); }
188 | '(' r ')'
189 { $$ = $2; }
190 | error
191 ;
192
193 %%
194 void add(int *, int);
195 void clearg(void);
196 void execute(char *);
197 void follow(int);
198 int mgetc(void);
199 void synerror(void);
200
201
202 void
203 yyerror(char *s)
204 {
205 fprintf(stderr, "egrep: %s\n", s);
206 exit(2);
207 }
208
209 int
210 yylex(void)
211 {
212 extern int yylval;
213 int cclcnt, x, ccount, oldccount;
214 wchar_t c, lc;
215
216 c = nextch();
217 switch(c) {
218 case '^':
219 yylval = LEFT;
220 return(CHAR);
221 case '$':
222 c = RIGHT;
223 goto defchar;
224 case '|': return (OR);
225 case '*': return (STAR);
226 case '+': return (PLUS);
227 case '?': return (QUEST);
228 case '(': return (c);
229 case ')': return (c);
230 case '.': return(DOT);
231 case '\0': return (0);
232 case RIGHT: return (OR);
233 case '[':
234 x = (multibyte ? MCCL : CCL);
235 cclcnt = 0;
236 count = nxtchar++;
237 if ((c = nextch()) == '^') {
238 x = (multibyte ? NMCCL : NCCL);
239 c = nextch();
240 }
241 lc = 0;
242 do {
243 if (iflag && iswalpha(c))
244 c = towlower(c);
245 if (c == '\0') synerror();
246 if (c == '-' && cclcnt > 0 && lc != 0) {
247 if ((c = nextch()) != 0) {
248 if(c == ']') {
249 chars[nxtchar++] = '-';
250 cclcnt++;
251 break;
252 }
253 if (iflag && iswalpha(c))
254 c = towlower(c);
255 if (!multibyte ||
256 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) &&
257 lc < c &&
258 !iswcntrl(c) && !iswcntrl(lc)) {
259 if (nxtchar >= maxclin)
260 if (allocchars() == 0)
261 overflo();
262 chars[nxtchar++] = '-';
263 cclcnt++;
264 }
265 }
266 }
267 ccount = oldccount = nxtchar;
268 if(ccount + MB_LEN_MAX >= maxclin)
269 if(allocchars() == 0)
270 overflo();
271 ccount += wctomb(&chars[ccount], c);
272 cclcnt += ccount - oldccount;
273 nxtchar += ccount - oldccount;
274 lc = c;
275 } while ((c = nextch()) != ']');
276 chars[count] = cclcnt;
277 return(x);
278
279 case '\\':
280 if ((c = nextch()) == '\0') synerror();
281 defchar:
282 default:
283 if (c <= 0177) {
284 yylval = c;
285 return (CHAR);
286 } else {
287 lyylval = c;
288 return (MCHAR);
289 }
290 }
291 }
292
293 wchar_t
294 nextch(void)
295 {
296 wchar_t lc;
297 char multic[MB_LEN_MAX];
298 int length, d;
299 if (fflag) {
300 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0)
301 synerror();
302 if(length == 0)
303 lc = '\0';
304 }
305 else {
306 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
307 synerror();
308 if(length == 0)
309 return(0);
310 input += length;
311 }
312 return(lc);
313 }
314
315 int
316 mgetc(void)
317 {
318 return(getc(expfile));
319 }
320
321 void
322 synerror(void)
323 {
324 fprintf(stderr, gettext("egrep: syntax error\n"));
325 exit(2);
326 }
327
328 int
329 enter(int x)
330 {
331 if(line >= maxlin)
332 if(alloctree() == 0)
333 overflo();
334 name[line] = x;
335 left[line] = 0;
336 right[line] = 0;
337 return(line++);
338 }
339
340 int
341 cclenter(int x)
342 {
343 int linno;
344 linno = enter(x);
345 right[linno] = count;
346 return (linno);
347 }
348
349 int
350 node(int x, int l, int r)
351 {
352 if(line >= maxlin)
353 if(alloctree() == 0)
354 overflo();
355 name[line] = x;
356 left[line] = l;
357 right[line] = r;
358 parent[l] = line;
359 parent[r] = line;
360 return(line++);
361 }
362
363 int
364 unary(int x, int d)
365 {
366 if(line >= maxlin)
367 if(alloctree() == 0)
368 overflo();
369 name[line] = x;
370 left[line] = d;
371 right[line] = 0;
372 parent[d] = line;
373 return(line++);
374 }
375
376 int
377 allocchars(void)
378 {
379 maxclin += MAXLIN;
380 if((chars = realloc(chars, maxclin)) == (char *)0)
381 return 0;
382 return 1;
383 }
384
385 int
386 alloctree(void)
387 {
388 maxlin += MAXLIN;
389 if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
390 return 0;
391 if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
392 return 0;
393 if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
394 return 0;
395 if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
396 return 0;
397 if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
398 return 0;
399 if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
400 return 0;
401 if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
402 return 0;
403 return 1;
404 }
405
406 void
407 overflo(void)
408 {
409 fprintf(stderr, gettext("egrep: regular expression too long\n"));
410 exit(2);
411 }
412
413 void
414 cfoll(int v)
415 {
416 int i;
417 if (left[v] == 0) {
418 count = 0;
419 for (i=1; i<=line; i++) tmpstat[i] = 0;
420 follow(v);
421 add(foll, v);
422 }
423 else if (right[v] == 0) cfoll(left[v]);
424 else {
425 cfoll(left[v]);
426 cfoll(right[v]);
427 }
428 }
429
430 void
431 cgotofn(void)
432 {
433 int i;
434 count = 0;
435 inxtpos = nxtpos;
436 for (i=3; i<=line; i++) tmpstat[i] = 0;
437 if (cstate(line-1)==0) {
438 tmpstat[line] = 1;
439 count++;
440 out[1] = 1;
441 }
442 for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
443 count--; /*leave out position 1 */
444 icount = count;
445 tmpstat[1] = 0;
446 add(state, 1);
447 istat = nxtst(1, LEFT);
448 }
449
450 int
451 nxtst(int s, int c)
452 {
453 int i, num, k;
454 int pos, curpos, number, newpos;
455 num = positions[state[s]];
456 count = icount;
457 for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
458 pos = state[s] + 1;
459 for (i=0; i<num; i++) {
460 curpos = positions[pos];
461 k = name[curpos];
462 if (k >= 0)
463 if (
464 (k == c)
465 || (k == DOT && dot(c))
466 || (k == MDOT && mdot(c))
467 || (k == CCL && dot(c) && member(c, right[curpos], 1))
468 || (k == NCCL && dot(c) && member(c, right[curpos], 0))
469 || (k == MCCL && mdot(c) && member(c, right[curpos], 1))
470 ) {
471 number = positions[foll[curpos]];
472 newpos = foll[curpos] + 1;
473 for (k=0; k<number; k++) {
474 if (tmpstat[positions[newpos]] != 1) {
475 tmpstat[positions[newpos]] = 1;
476 count++;
477 }
478 newpos++;
479 }
480 }
481 pos++;
482 }
483 if (notin(nstate)) {
484 if (++nstate >= NSTATES) {
485 for (i=1; i<NSTATES; i++)
486 out[i] = 0;
487 for (i=1; i<NSTATES; i++)
488 for (k=0; k<NCHARS; k++)
489 gotofn[i][k] = 0;
490 nstate = 1;
491 nxtpos = inxtpos;
492 reinit = 1;
493 add(state, nstate);
494 if (tmpstat[line] == 1) out[nstate] = 1;
495 return nstate;
496 }
497 add(state, nstate);
498 if (tmpstat[line] == 1) out[nstate] = 1;
499 gotofn[s][c] = nstate;
500 return nstate;
501 }
502 else {
503 gotofn[s][c] = xstate;
504 return xstate;
505 }
506 }
507
508
509 int
510 cstate(int v)
511 {
512 int b;
513 if (left[v] == 0) {
514 if (tmpstat[v] != 1) {
515 tmpstat[v] = 1;
516 count++;
517 }
518 return(1);
519 }
520 else if (right[v] == 0) {
521 if (cstate(left[v]) == 0) return (0);
522 else if (name[v] == PLUS) return (1);
523 else return (0);
524 }
525 else if (name[v] == CAT) {
526 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
527 else return (1);
528 }
529 else { /* name[v] == OR */
530 b = cstate(right[v]);
531 if (cstate(left[v]) == 0 || b == 0) return (0);
532 else return (1);
533 }
534 }
535
536
537 int
538 dot(int c)
539 {
540 if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
541 return(0);
542 if(c == RIGHT || c == LEFT)
543 return(0);
544 return(1);
545 }
546
547 int
548 mdot(int c)
549 {
550 if(c >= 0200 && !iscntrl(c))
551 return(1);
552 return(0);
553 }
554
555 int
556 member(int symb, int set, int torf)
557 {
558 int i, num, pos, c, lc;
559 if(symb == RIGHT || symb == LEFT)
560 return(0);
561 num = chars[set];
562 pos = set + 1;
563 lc = 0;
564 if(iflag)
565 symb = tolower(symb);
566 for (i=0; i<num; i++) {
567 c = (unsigned char)chars[pos++];
568 if(c == '-' && lc != 0 && ++i < num) {
569 c = (unsigned char)chars[pos++];
570 if(lc <= symb && symb <= c)
571 return(torf);
572 }
573 if (symb == c)
574 return (torf);
575 lc = c;
576 }
577 return(!torf);
578 }
579
580 int
581 notin(int n)
582 {
583 int i, j, pos;
584 for (i=1; i<=n; i++) {
585 if (positions[state[i]] == count) {
586 pos = state[i] + 1;
587 for (j=0; j < count; j++)
588 if (tmpstat[positions[pos++]] != 1) goto nxt;
589 xstate = i;
590 return (0);
591 }
592 nxt: ;
593 }
594 return (1);
595 }
596
597 void
598 add(int *array, int n)
599 {
600 int i;
601 if (nxtpos + count >= maxpos) {
602 maxpos += MAXPOS + count;
603 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
604 overflo();
605 }
606 array[n] = nxtpos;
607 positions[nxtpos++] = count;
608 for (i=3; i <= line; i++) {
609 if (tmpstat[i] == 1) {
610 positions[nxtpos++] = i;
611 }
612 }
613 }
614
615 void
616 follow(int v)
617 {
618 int p;
619 if (v == line) return;
620 p = parent[v];
621 switch(name[p]) {
622 case STAR:
623 case PLUS: cstate(v);
624 follow(p);
625 return;
626
627 case OR:
628 case QUEST: follow(p);
629 return;
630
631 case CAT: if (v == left[p]) {
632 if (cstate(right[p]) == 0) {
633 follow(p);
634 return;
635 }
636 }
637 else follow(p);
638 return;
639 case FINAL: if (tmpstat[line] != 1) {
640 tmpstat[line] = 1;
641 count++;
642 }
643 return;
644 }
645 }
646
647 #define USAGE "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..."
648
649 int
650 main(int argc, char **argv)
651 {
652 char c;
653 char nl = '\n';
654 int errflag = 0;
655
656 (void)setlocale(LC_ALL, "");
657
658 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
659 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */
660 #endif
661 (void) textdomain(TEXT_DOMAIN);
662
663 while((c = getopt(argc, argv, "ybcie:f:hlnvs")) != -1)
664 switch(c) {
665
666 case 'b':
667 bflag++;
668 continue;
669
670 case 'c':
671 cflag++;
672 continue;
673
674 case 'e':
675 eflag++;
676 input = optarg;
677 continue;
678
679 case 'f':
680 fflag++;
681 expfile = fopen(optarg, "r");
682 if(expfile == NULL) {
683 fprintf(stderr,
684 gettext("egrep: can't open %s\n"), optarg);
685 exit(2);
686 }
687 continue;
688
689 case 'h':
690 hflag++;
691 continue;
692
693 case 'y':
694 case 'i':
695 iflag++;
696 continue;
697
698 case 'l':
699 lflag++;
700 continue;
701
702 case 'n':
703 nflag++;
704 continue;
705
706 case 's':
707 sflag++;
708 continue;
709
710 case 'v':
711 vflag++;
712 continue;
713
714 case '?':
715 errflag++;
716 }
717 if (errflag || ((argc <= 0) && !fflag && !eflag)) {
718 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE));
719 exit(2);
720 }
721 if(!eflag && !fflag) {
722 input = argv[optind];
723 optind++;
724 }
725
726 argc -= optind;
727 argv = &argv[optind];
728
729 /* allocate initial space for arrays */
730 if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
731 overflo();
732 if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
733 overflo();
734 if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
735 overflo();
736 if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
737 overflo();
738 if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
739 overflo();
740 if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
741 overflo();
742 if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
743 overflo();
744 if((chars = (char *)malloc(MAXLIN)) == (char *)0)
745 overflo();
746 if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
747 overflo();
748 if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
749 overflo();
750 if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
751 overflo();
752 maxlin = MAXLIN;
753 maxclin = MAXLIN;
754 maxwclin = MAXLIN;
755 maxpos = MAXPOS;
756
757 yyparse();
758
759 cfoll(line-1);
760 cgotofn();
761 nfile = argc;
762 if (argc<=0) {
763 execute(0);
764 }
765 else while (--argc >= 0) {
766 if (reinit == 1) clearg();
767 execute(*argv++);
768 }
769 return (badbotch ? 2 : nsucc==0);
770 }
771
772 void
773 execute(char *file)
774 {
775 char *p;
776 int cstat;
777 wchar_t c;
778 int t;
779 long count;
780 long count1, count2;
781 long nchars;
782 int succ;
783 char *ptr, *ptrend, *lastptr;
784 char *buf;
785 long lBufSiz;
786 FILE *f;
787 int nlflag;
788
789 lBufSiz = EBUFSIZ;
790 if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) {
791 exit (2); /* out of memory - BAIL */
792 }
793
794 if (file) {
795 if ((f = fopen(file, "r")) == NULL) {
796 fprintf(stderr,
797 gettext("egrep: can't open %s\n"), file);
798 badbotch=1;
799 return;
800 }
801 } else {
802 file = "<stdin>";
803 f = stdin;
804 }
805 lnum = 1;
806 tln = 0;
807 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) {
808 fclose(f);
809
810 if (cflag) {
811 if (nfile>1 && !hflag)
812 fprintf(stdout, "%s:", file);
813 fprintf(stdout, "%lld\n", tln);
814 }
815 return;
816 }
817
818 blkno = count;
819 ptr = buf;
820 for(;;) {
821 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
822 /*
823 move the unused partial record to the head of the buffer
824 */
825 if (ptr > buf) {
826 count = buf + count - ptr;
827 memmove (buf, ptr, count);
828 ptr = buf;
829 }
830
831 /*
832 Get a bigger buffer if this one is full
833 */
834 if(count > lBufSiz) {
835 /*
836 expand the buffer
837 */
838 lBufSiz += EBUFSIZ;
839 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) {
840 exit (2); /* out of memory - BAIL */
841 }
842
843 ptr = buf;
844 }
845
846 p = buf + count;
847 if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) {
848 count += count1;
849 blkno += count1;
850 continue;
851 }
852 ptrend = ptr + count;
853 nlflag = 0;
854 } else
855 nlflag = 1;
856 *ptrend = '\n';
857 p = ptr;
858 lastptr = ptr;
859 cstat = istat;
860 succ = 0;
861 for(;;) {
862 if(out[cstat]) {
863 if(multibyte && p > ptr) {
864 wchar_t wchar;
865 int length;
866 char *endptr = p;
867 p = lastptr;
868 while(p < endptr) {
869 length = mbtowc(&wchar, p, MB_LEN_MAX);
870 if(length <= 1)
871 p++;
872 else
873 p += length;
874 }
875 if(p == endptr) {
876 succ = !vflag;
877 break;
878 }
879 cstat = 1;
880 length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
881 if(length <= 1)
882 lastptr++;
883 else
884 lastptr += length;
885 p = lastptr;
886 continue;
887 }
888 succ = !vflag;
889 break;
890 }
891 c = (unsigned char)*p++;
892 if ((t = gotofn[cstat][c]) == 0)
893 cstat = nxtst(cstat, c);
894 else
895 cstat = t;
896 if(c == RIGHT) {
897 if(out[cstat]) {
898 succ = !vflag;
899 break;
900 }
901 succ = vflag;
902 break;
903 }
904 }
905 if(succ) {
906 nsucc = 1;
907 if (cflag) tln++;
908 else if (sflag)
909 ; /* ugh */
910 else if (lflag) {
911 printf("%s\n", file);
912 fclose(f);
913 return;
914 }
915 else {
916 if (nfile > 1 && !hflag)
917 printf(gettext("%s:"), file);
918 if (bflag) {
919 nchars = blkno - (buf + count - ptrend) - 2;
920 if(nlflag)
921 nchars++;
922 printf("%lld:", nchars/BLKSIZE);
923 }
924 if (nflag)
925 printf("%lld:", lnum);
926 if(nlflag)
927 nchars = ptrend - ptr + 1;
928 else
929 nchars = ptrend - ptr;
930 fwrite(ptr, (size_t)1, (size_t)nchars, stdout);
931 }
932 }
933 if(!nlflag)
934 break;
935 ptr = ptrend + 1;
936 if(ptr >= buf + count) {
937 ptr = buf;
938 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0)
939 break;
940 blkno += count;
941 }
942 lnum++;
943 if (reinit == 1)
944 clearg();
945 }
946 fclose(f);
947 if (cflag) {
948 if (nfile > 1 && !hflag)
949 printf(gettext("%s:"), file);
950 printf("%lld\n", tln);
951 }
952 }
953
954 void
955 clearg(void)
956 {
957 int i, k;
958 for (i=1; i<=nstate; i++)
959 out[i] = 0;
960 for (i=1; i<=nstate; i++)
961 for (k=0; k<NCHARS; k++)
962 gotofn[i][k] = 0;
963 nstate = 1;
964 nxtpos = inxtpos;
965 reinit = 0;
966 count = 0;
967 for (i=3; i<=line; i++) tmpstat[i] = 0;
968 if (cstate(line-1)==0) {
969 tmpstat[line] = 1;
970 count++;
971 out[1] = 1;
972 }
973 for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
974 count--; /*leave out position 1 */
975 icount = count;
976 tmpstat[1] = 0;
977 add(state, 1);
978 istat = nxtst(1, LEFT);
979 }
980
981 int
982 mdotenter(void)
983 {
984 int i, x1, x2;
985 x1 = enter(DOT);
986 x2 = enter(MDOT);
987 for(i = 1; i < (int) eucw1; i++)
988 x2 = node(CAT, x2, enter(MDOT));
989 x1 = node(OR, x1, x2);
990 if(eucw2) {
991 x2 = enter('\216');
992 for(i = 1; i <= (int) eucw2; i++)
993 x2 = node(CAT, x2, enter(MDOT));
994 x1 = node(OR, x1, x2);
995 }
996 if(eucw3) {
997 x2 = enter('\217');
998 for(i = 1; i <= (int) eucw3; i++)
999 x2 = node(CAT, x2, enter(MDOT));
1000 x1 = node(OR, x1, x2);
1001 }
1002 return(x1);
1003 }
1004
1005 int
1006 mchar(wchar_t c)
1007 {
1008 char multichar[MB_LEN_MAX+1];
1009 char *p;
1010 int x1, lc, length;
1011
1012 length = wctomb(multichar, c);
1013 p = multichar;
1014 *(p + length) = '\0';
1015 x1 = enter((unsigned char)*p++);
1016 while(lc = (unsigned char)*p++)
1017 x1 = node(CAT, x1, enter(lc));
1018 return(x1);
1019 }
1020
1021 int
1022 ccl(int type)
1023 {
1024 wchar_t c, lc;
1025 char multic1[MB_LEN_MAX];
1026 char multic2[MB_LEN_MAX];
1027 int x1, x2, length, current, last, cclcnt;
1028 x2 = 0;
1029 current = 0;
1030 last = genrange(type);
1031 nxtchar = count + 1;
1032 cclcnt = 0;
1033 /* create usual character class for single byte characters */
1034 while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
1035 cclcnt++;
1036 chars[nxtchar++] = c;
1037 if(lower[current] != upper[current]) {
1038 chars[nxtchar++] = '-';
1039 chars[nxtchar++] = upper[current];
1040 cclcnt += 2;
1041 }
1042 current++;
1043 }
1044
1045 if(cclcnt)
1046 chars[count] = cclcnt;
1047 else
1048 nxtchar = count;
1049 if(current > 0)
1050 /* single byte part of character class */
1051 x2 = cclenter(type);
1052 else if(type == NCCL)
1053 /* all single byte characters match */
1054 x2 = enter(DOT);
1055 while(current <= last) {
1056 if(upper[current] == lower[current])
1057 x1 = mchar(lower[current]);
1058 else {
1059 length = wctomb(multic1, lower[current]);
1060 wctomb(multic2, upper[current]);
1061 x1 = range((unsigned char *)multic1,
1062 (unsigned char *)multic2, length);
1063 }
1064 if(x2)
1065 x2 = node(OR, x2, x1);
1066 else
1067 x2 = x1;
1068 current++;
1069 }
1070 return x2;
1071 }
1072
1073 int
1074 range(unsigned char *p1, unsigned char *p2, int length)
1075 {
1076 char multic[MB_LEN_MAX+1];
1077 char *p;
1078 int i, x1, x2;
1079 if(length == 1)
1080 return(classenter(*p1, *p2));
1081 if(p1[0] == p2[0])
1082 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));
1083 p = multic;
1084 for(i = 1; i < length; i++)
1085 *p++ = 0377;
1086 x1 = node(CAT, enter(p1[0]),
1087 range(p1+1, (unsigned char *)multic, length - 1));
1088 if((unsigned char)(p1[0] + 1) < p2[0]) {
1089 x2 = classenter(p1[0] + 1, p2[0] - 1);
1090 for(i = 1; i < length; i++)
1091 x2 = node(CAT, x2, enter(MDOT));
1092 x1 = node(OR, x1, x2);
1093 }
1094 p = multic;
1095 for(i = 1; i < length; i++)
1096 *p++ = 0200;
1097 x2 = node(CAT, enter(p2[0]),
1098 range((unsigned char *)multic, p2+1, length - 1));
1099 return node(OR, x1, x2);
1100 }
1101
1102 int
1103 classenter(int x1, int x2)
1104 {
1105 static int max, min;
1106 if(!max) {
1107 int i;
1108 for(i = 0200; i <= 0377; i++)
1109 if(!iscntrl(i))
1110 break;
1111 min = i;
1112 for(i = 0377; i >= 0200; i--)
1113 if(!iscntrl(i))
1114 break;
1115 max = i;
1116 }
1117 if(x1 <= min && x2 >= max)
1118 return enter(MDOT);
1119 if(nxtchar + 4 >= maxclin)
1120 if(allocchars() == 0)
1121 overflo();
1122 count = nxtchar++;
1123 chars[nxtchar++] = x1;
1124 chars[nxtchar++] = '-';
1125 chars[nxtchar++] = x2;
1126 chars[count] = 3;
1127 return cclenter(MCCL);
1128 }
1129
1130 int
1131 genrange(int type)
1132 {
1133 char *p, *endp;
1134 int current, nel, i, last, length;
1135 wchar_t c, lc;
1136
1137 current = 0;
1138 p = &chars[count+1];
1139 endp = &chars[count+1] + chars[count];
1140 lc = 0;
1141
1142 /* convert character class into union of ranges */
1143 while(p < endp) {
1144 length = mbtowc(&c, p, MB_LEN_MAX);
1145 p += length;
1146 if(c == '-' && lc != 0) {
1147 length = mbtowc(&c, p, MB_LEN_MAX);
1148 upper[current-1] = c;
1149 p += length;
1150 } else {
1151 lower[current] = c;
1152 upper[current++] = c;
1153 }
1154 lc = c;
1155 }
1156 nel = current;
1157 /* sort lower and upper bounds of ranges */
1158 qsort((char *)lower, nel, sizeof(wchar_t), compare);
1159 qsort((char *)upper, nel, sizeof(wchar_t), compare);
1160 last = current - 1;
1161 current = 0;
1162 /* combine overlapping or adjacent ranges */
1163 for(i = 0; i < last; i++)
1164 if(upper[i] >= lower[i+1] - 1)
1165 upper[current] = upper[i+1];
1166 else {
1167 lower[++current] = lower[i+1];
1168 upper[current] = upper[i+1];
1169 }
1170 if(type == NCCL) {
1171 /* find complement of character class */
1172 int j, next;
1173 i = 0;
1174 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
1175 i++;
1176 if(i > current) {
1177 /* match all multibyte characters */
1178 if(eucw2) {
1179 lower[i] = maxmin(WCHAR_CS2, 0);
1180 upper[i++] = maxmin(WCHAR_CS2, 1);
1181 }
1182 if(eucw3) {
1183 lower[i] = maxmin(WCHAR_CS3, 0);
1184 upper[i++] = maxmin(WCHAR_CS3, 1);
1185 }
1186 lower[i] = maxmin(WCHAR_CS1, 0);
1187 upper[i++] = maxmin(WCHAR_CS1, 1);
1188 return i - 1;
1189 }
1190 next = current + 1;
1191 if(next + current + 2 >= maxwclin) {
1192 maxwclin += MAXLIN + next + current + 2;
1193 if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
1194 (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
1195 overflo();
1196 }
1197 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) {
1198 lower[next] = maxmin(WCHAR_CS2, 0);
1199 if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) {
1200 upper[next++] = maxmin(WCHAR_CS2, 1);
1201 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) {
1202 lower[next] = maxmin(WCHAR_CS3, 0);
1203 upper[next++] = maxmin(WCHAR_CS3, 1);
1204 }
1205 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1206 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1207 upper[next++] = lower[i] - 1;
1208 }
1209 } else
1210 upper[next++] = lower[i] - 1;
1211 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1212 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1213 upper[next++] = lower[i] - 1;
1214 }
1215 for(j = i; j < current; j++) {
1216 if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) {
1217 lower[next] = upper[j] + 1;
1218 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) {
1219 upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1);
1220 if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) {
1221 lower[next] = maxmin(WCHAR_CS3, 0);
1222 upper[next++] = maxmin(WCHAR_CS3, 1);
1223 }
1224 if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) {
1225 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0);
1226 upper[next++] = lower[j+1] - 1;
1227 }
1228 } else
1229 upper[next++] = lower[j+1] - 1;
1230 } else if(lower[j+1] > maxmin(lower[j+1], 0)) {
1231 lower[next] = maxmin(lower[j+1], 0);
1232 upper[next++] = lower[j+1] - 1;
1233 }
1234 }
1235 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) {
1236 lower[next] = upper[current] + 1;
1237 upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1);
1238 }
1239 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) {
1240 if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) {
1241 lower[next] = maxmin(WCHAR_CS3, 0);
1242 upper[next++] = maxmin(WCHAR_CS3, 1);
1243 }
1244 lower[next] = maxmin(WCHAR_CS1, 0);
1245 upper[next++] = maxmin(WCHAR_CS1, 1);
1246 }
1247 for(j = current + 1; j < next; j++) {
1248 lower[i] = lower[j];
1249 upper[i++] = upper[j];
1250 }
1251 current = i - 1;
1252 }
1253 return(current);
1254 }
1255
1256 int
1257 compare(wchar_t *c, wchar_t *d)
1258 {
1259 if(*c < *d)
1260 return -1;
1261 if(*c == *d)
1262 return 0;
1263 return 1;
1264 }
1265
1266 wchar_t
1267 maxmin(wchar_t c, int flag)
1268 {
1269 static wchar_t minmax1[2], minmax2[2], minmax3[2];
1270
1271 if(!minmax1[0]) {
1272 /* compute min and max process codes for all code sets */
1273 int length, i;
1274 char multic[MB_LEN_MAX], minmax[2];
1275 for(i = 0377; i >= 0200; i--)
1276 if(!iscntrl(i))
1277 break;
1278 minmax[1] = i;
1279 for(i = 0240; i <= 0377; i++)
1280 if(!iscntrl(i))
1281 break;
1282 minmax[0] = i;
1283 for(i = 0; i <= 1; i++) {
1284 length = MB_LEN_MAX;
1285 while(length--)
1286 multic[length] = minmax[i];
1287 mbtowc(&minmax1[i], multic, MB_LEN_MAX);
1288 if(eucw2) {
1289 multic[0] = SS2;
1290 mbtowc(&minmax2[i], multic, MB_LEN_MAX);
1291 }
1292 if(eucw3) {
1293 multic[0] = SS3;
1294 mbtowc(&minmax3[i], multic, MB_LEN_MAX);
1295 }
1296 }
1297 }
1298 switch(c) {
1299 case WCHAR_CS1: return minmax1[flag];
1300 case WCHAR_CS2: return minmax2[flag];
1301 case WCHAR_CS3: return minmax3[flag];
1302 }
1303
1304 /* NOTREACHED */
1305 return (0);
1306 }