1 %{
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License, Version 1.0 only
7 * (the "License"). You may not use this file except in compliance
8 * with the License.
9 *
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 *
21 * CDDL HEADER END
22 */
23
24 /*
25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31 %}
32
33 %{
34 #pragma ident "%Z%%M% %I% %E% SMI"
35 %}
36
37 %Start A str sc reg comment
38
39 %{
40
41 #include <sys/types.h>
42 #include "awk.h"
43 #include "y.tab.h"
44
45 #undef input /* defeat lex */
46 #undef unput
47
48 static void unput(int);
49 static void unputstr(char *);
50
51 extern YYSTYPE yylval;
52 extern int infunc;
53
54 off_t lineno = 1;
55 int bracecnt = 0;
56 int brackcnt = 0;
57 int parencnt = 0;
58 #define DEBUG
59 #ifdef DEBUG
60 # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
61 #else
62 # define RET(x) return(x)
63 #endif
64
65 /*
66 * The standards (SUSV2) requires that Record size be atleast LINE_MAX.
67 * LINE_MAX is a standard variable defined in limits.h.
68 * Though nawk is not standards compliant, we let RECSIZE
69 * grow with LINE_MAX instead of the magic number 1024.
70 */
71 #define CBUFLEN (3 * LINE_MAX)
72
73 #define CADD cbuf[clen++] = yytext[0]; \
74 if (clen >= CBUFLEN-1) { \
75 ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
76 BEGIN A; \
77 }
78
79 static uchar cbuf[CBUFLEN];
80 static uchar *s;
81 static int clen, cflag;
82 %}
83
84 A [a-zA-Z_]
85 B [a-zA-Z0-9_]
86 D [0-9]
87 O [0-7]
88 H [0-9a-fA-F]
89 WS [ \t]
90
91 %%
92 switch (yybgin-yysvec-1) { /* witchcraft */
93 case 0:
94 BEGIN A;
95 break;
96 case sc:
97 BEGIN A;
98 RET('}');
99 }
100
101 <A>\n { lineno++; RET(NL); }
102 <A>#.* { ; } /* strip comments */
103 <A>{WS}+ { ; }
104 <A>; { RET(';'); }
105
106 <A>"\\"\n { lineno++; }
107 <A>BEGIN { RET(XBEGIN); }
108 <A>END { RET(XEND); }
109 <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
110 <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
111 <A>"&&" { RET(AND); }
112 <A>"||" { RET(BOR); }
113 <A>"!" { RET(NOT); }
114 <A>"!=" { yylval.i = NE; RET(NE); }
115 <A>"~" { yylval.i = MATCH; RET(MATCHOP); }
116 <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); }
117 <A>"<" { yylval.i = LT; RET(LT); }
118 <A>"<=" { yylval.i = LE; RET(LE); }
119 <A>"==" { yylval.i = EQ; RET(EQ); }
120 <A>">=" { yylval.i = GE; RET(GE); }
121 <A>">" { yylval.i = GT; RET(GT); }
122 <A>">>" { yylval.i = APPEND; RET(APPEND); }
123 <A>"++" { yylval.i = INCR; RET(INCR); }
124 <A>"--" { yylval.i = DECR; RET(DECR); }
125 <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); }
126 <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); }
127 <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); }
128 <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); }
129 <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); }
130 <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); }
131 <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); }
132 <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); }
133 <A>"**" { RET(POWER); }
134 <A>"^" { RET(POWER); }
135
136 <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
137 <A>"$NF" { unputstr("(NF)"); return(INDIRECT); }
138 <A>"$"{A}{B}* { int c, n;
139 c = input(); unput(c);
140 if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
141 unputstr(yytext+1);
142 return(INDIRECT);
143 } else {
144 yylval.cp = setsymtab((uchar *)yytext+1,
145 (uchar *)"",0.0,STR|NUM,symtab);
146 RET(IVAR);
147 }
148 }
149 <A>"$" { RET(INDIRECT); }
150 <A>NF { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); }
151
152 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
153 yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab);
154 RET(NUMBER); }
155
156 <A>while { RET(WHILE); }
157 <A>for { RET(FOR); }
158 <A>do { RET(DO); }
159 <A>if { RET(IF); }
160 <A>else { RET(ELSE); }
161 <A>next { RET(NEXT); }
162 <A>exit { RET(EXIT); }
163 <A>break { RET(BREAK); }
164 <A>continue { RET(CONTINUE); }
165 <A>print { yylval.i = PRINT; RET(PRINT); }
166 <A>printf { yylval.i = PRINTF; RET(PRINTF); }
167 <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); }
168 <A>split { yylval.i = SPLIT; RET(SPLIT); }
169 <A>substr { RET(SUBSTR); }
170 <A>sub { yylval.i = SUB; RET(SUB); }
171 <A>gsub { yylval.i = GSUB; RET(GSUB); }
172 <A>index { RET(INDEX); }
173 <A>match { RET(MATCHFCN); }
174 <A>in { RET(IN); }
175 <A>getline { RET(GETLINE); }
176 <A>close { RET(CLOSE); }
177 <A>delete { RET(DELETE); }
178 <A>length { yylval.i = FLENGTH; RET(BLTIN); }
179 <A>log { yylval.i = FLOG; RET(BLTIN); }
180 <A>int { yylval.i = FINT; RET(BLTIN); }
181 <A>exp { yylval.i = FEXP; RET(BLTIN); }
182 <A>sqrt { yylval.i = FSQRT; RET(BLTIN); }
183 <A>sin { yylval.i = FSIN; RET(BLTIN); }
184 <A>cos { yylval.i = FCOS; RET(BLTIN); }
185 <A>atan2 { yylval.i = FATAN; RET(BLTIN); }
186 <A>system { yylval.i = FSYSTEM; RET(BLTIN); }
187 <A>rand { yylval.i = FRAND; RET(BLTIN); }
188 <A>srand { yylval.i = FSRAND; RET(BLTIN); }
189 <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); }
190 <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); }
191
192 <A>{A}{B}* { int n, c;
193 c = input(); unput(c); /* look for '(' */
194 if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
195 yylval.i = n;
196 RET(ARG);
197 } else {
198 yylval.cp = setsymtab((uchar *)yytext,
199 (uchar *)"",0.0,STR|NUM,symtab);
200 if (c == '(') {
201 RET(CALL);
202 } else {
203 RET(VAR);
204 }
205 }
206 }
207 <A>\" { BEGIN str; clen = 0; }
208
209 <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
210 <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
211 <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
212
213 <A>. { if (yytext[0] == '{') bracecnt++;
214 else if (yytext[0] == '[') brackcnt++;
215 else if (yytext[0] == '(') parencnt++;
216 RET(yylval.i = yytext[0]); /* everything else */ }
217
218 <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
219 <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
220 <reg>"/" { BEGIN A;
221 cbuf[clen] = 0;
222 yylval.s = tostring(cbuf);
223 unput('/');
224 RET(REGEXPR); }
225 <reg>. { CADD; }
226
227 <str>\" { BEGIN A;
228 cbuf[clen] = 0; s = tostring(cbuf);
229 cbuf[clen] = ' '; cbuf[++clen] = 0;
230 yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
231 RET(STRING); }
232 <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
233 <str>"\\\"" { cbuf[clen++] = '"'; }
234 <str>"\\"n { cbuf[clen++] = '\n'; }
235 <str>"\\"t { cbuf[clen++] = '\t'; }
236 <str>"\\"f { cbuf[clen++] = '\f'; }
237 <str>"\\"r { cbuf[clen++] = '\r'; }
238 <str>"\\"b { cbuf[clen++] = '\b'; }
239 <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */
240 <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */
241 <str>"\\\\" { cbuf[clen++] = '\\'; }
242 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
243 sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
244 <str>"\\"x({H}+) { int n; /* ANSI permits any number! */
245 sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
246 <str>"\\". { cbuf[clen++] = yytext[1]; }
247 <str>. { CADD; }
248
249 %%
250
251 void
252 startreg()
253 {
254 BEGIN reg;
255 clen = 0;
256 }
257
258 /* input() and unput() are transcriptions of the standard lex
259 macros for input and output with additions for error message
260 printing. God help us all if someone changes how lex works.
261 */
262
263 uchar ebuf[300];
264 uchar *ep = ebuf;
265
266 int
267 input(void)
268 {
269 register int c;
270 extern uchar *lexprog;
271
272 if (yysptr > yysbuf)
273 c = U(*--yysptr);
274 else if (lexprog != NULL) /* awk '...' */
275 c = *lexprog++;
276 else /* awk -f ... */
277 c = pgetc();
278 if (c == '\n')
279 yylineno++;
280 else if (c == EOF)
281 c = 0;
282 if (ep >= ebuf + sizeof ebuf)
283 ep = ebuf;
284 return *ep++ = c;
285 }
286
287 static void
288 unput(int c)
289 {
290 yytchar = c;
291 if (yytchar == '\n')
292 yylineno--;
293 *yysptr++ = yytchar;
294 if (--ep < ebuf)
295 ep = ebuf + sizeof(ebuf) - 1;
296 }
297
298
299 static void
300 unputstr(char *s)
301 {
302 int i;
303
304 for (i = strlen(s)-1; i >= 0; i--)
305 unput(s[i]);
306 }