1 %{ 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License, Version 1.0 only 7 * (the "License"). You may not use this file except in compliance 8 * with the License. 9 * 10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11 * or http://www.opensolaris.org/os/licensing. 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 * 15 * When distributing Covered Code, include this CDDL HEADER in each 16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17 * If applicable, add the following below this CDDL HEADER, with the 18 * fields enclosed by brackets "[]" replaced with your own identifying 19 * information: Portions Copyright [yyyy] [name of copyright owner] 20 * 21 * CDDL HEADER END 22 */ 23 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 %} 32 33 %{ 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 %} 36 37 %Start A str sc reg comment 38 39 %{ 40 41 #include <sys/types.h> 42 #include "awk.h" 43 #include "y.tab.h" 44 45 #undef input /* defeat lex */ 46 #undef unput 47 48 static void unput(int); 49 static void unputstr(char *); 50 51 extern YYSTYPE yylval; 52 extern int infunc; 53 54 off_t lineno = 1; 55 int bracecnt = 0; 56 int brackcnt = 0; 57 int parencnt = 0; 58 #define DEBUG 59 #ifdef DEBUG 60 # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } 61 #else 62 # define RET(x) return(x) 63 #endif 64 65 /* 66 * The standards (SUSV2) requires that Record size be atleast LINE_MAX. 67 * LINE_MAX is a standard variable defined in limits.h. 68 * Though nawk is not standards compliant, we let RECSIZE 69 * grow with LINE_MAX instead of the magic number 1024. 70 */ 71 #define CBUFLEN (3 * LINE_MAX) 72 73 #define CADD cbuf[clen++] = yytext[0]; \ 74 if (clen >= CBUFLEN-1) { \ 75 ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \ 76 BEGIN A; \ 77 } 78 79 static uchar cbuf[CBUFLEN]; 80 static uchar *s; 81 static int clen, cflag; 82 %} 83 84 A [a-zA-Z_] 85 B [a-zA-Z0-9_] 86 D [0-9] 87 O [0-7] 88 H [0-9a-fA-F] 89 WS [ \t] 90 91 %% 92 switch (yybgin-yysvec-1) { /* witchcraft */ 93 case 0: 94 BEGIN A; 95 break; 96 case sc: 97 BEGIN A; 98 RET('}'); 99 } 100 101 <A>\n { lineno++; RET(NL); } 102 <A>#.* { ; } /* strip comments */ 103 <A>{WS}+ { ; } 104 <A>; { RET(';'); } 105 106 <A>"\\"\n { lineno++; } 107 <A>BEGIN { RET(XBEGIN); } 108 <A>END { RET(XEND); } 109 <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } 110 <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } 111 <A>"&&" { RET(AND); } 112 <A>"||" { RET(BOR); } 113 <A>"!" { RET(NOT); } 114 <A>"!=" { yylval.i = NE; RET(NE); } 115 <A>"~" { yylval.i = MATCH; RET(MATCHOP); } 116 <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } 117 <A>"<" { yylval.i = LT; RET(LT); } 118 <A>"<=" { yylval.i = LE; RET(LE); } 119 <A>"==" { yylval.i = EQ; RET(EQ); } 120 <A>">=" { yylval.i = GE; RET(GE); } 121 <A>">" { yylval.i = GT; RET(GT); } 122 <A>">>" { yylval.i = APPEND; RET(APPEND); } 123 <A>"++" { yylval.i = INCR; RET(INCR); } 124 <A>"--" { yylval.i = DECR; RET(DECR); } 125 <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } 126 <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } 127 <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } 128 <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } 129 <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } 130 <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } 131 <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } 132 <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } 133 <A>"**" { RET(POWER); } 134 <A>"^" { RET(POWER); } 135 136 <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } 137 <A>"$NF" { unputstr("(NF)"); return(INDIRECT); } 138 <A>"$"{A}{B}* { int c, n; 139 c = input(); unput(c); 140 if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) { 141 unputstr(yytext+1); 142 return(INDIRECT); 143 } else { 144 yylval.cp = setsymtab((uchar *)yytext+1, 145 (uchar *)"",0.0,STR|NUM,symtab); 146 RET(IVAR); 147 } 148 } 149 <A>"$" { RET(INDIRECT); } 150 <A>NF { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); } 151 152 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { 153 yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab); 154 RET(NUMBER); } 155 156 <A>while { RET(WHILE); } 157 <A>for { RET(FOR); } 158 <A>do { RET(DO); } 159 <A>if { RET(IF); } 160 <A>else { RET(ELSE); } 161 <A>next { RET(NEXT); } 162 <A>exit { RET(EXIT); } 163 <A>break { RET(BREAK); } 164 <A>continue { RET(CONTINUE); } 165 <A>print { yylval.i = PRINT; RET(PRINT); } 166 <A>printf { yylval.i = PRINTF; RET(PRINTF); } 167 <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } 168 <A>split { yylval.i = SPLIT; RET(SPLIT); } 169 <A>substr { RET(SUBSTR); } 170 <A>sub { yylval.i = SUB; RET(SUB); } 171 <A>gsub { yylval.i = GSUB; RET(GSUB); } 172 <A>index { RET(INDEX); } 173 <A>match { RET(MATCHFCN); } 174 <A>in { RET(IN); } 175 <A>getline { RET(GETLINE); } 176 <A>close { RET(CLOSE); } 177 <A>delete { RET(DELETE); } 178 <A>length { yylval.i = FLENGTH; RET(BLTIN); } 179 <A>log { yylval.i = FLOG; RET(BLTIN); } 180 <A>int { yylval.i = FINT; RET(BLTIN); } 181 <A>exp { yylval.i = FEXP; RET(BLTIN); } 182 <A>sqrt { yylval.i = FSQRT; RET(BLTIN); } 183 <A>sin { yylval.i = FSIN; RET(BLTIN); } 184 <A>cos { yylval.i = FCOS; RET(BLTIN); } 185 <A>atan2 { yylval.i = FATAN; RET(BLTIN); } 186 <A>system { yylval.i = FSYSTEM; RET(BLTIN); } 187 <A>rand { yylval.i = FRAND; RET(BLTIN); } 188 <A>srand { yylval.i = FSRAND; RET(BLTIN); } 189 <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } 190 <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } 191 192 <A>{A}{B}* { int n, c; 193 c = input(); unput(c); /* look for '(' */ 194 if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { 195 yylval.i = n; 196 RET(ARG); 197 } else { 198 yylval.cp = setsymtab((uchar *)yytext, 199 (uchar *)"",0.0,STR|NUM,symtab); 200 if (c == '(') { 201 RET(CALL); 202 } else { 203 RET(VAR); 204 } 205 } 206 } 207 <A>\" { BEGIN str; clen = 0; } 208 209 <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } 210 <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } 211 <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } 212 213 <A>. { if (yytext[0] == '{') bracecnt++; 214 else if (yytext[0] == '[') brackcnt++; 215 else if (yytext[0] == '(') parencnt++; 216 RET(yylval.i = yytext[0]); /* everything else */ } 217 218 <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } 219 <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 220 <reg>"/" { BEGIN A; 221 cbuf[clen] = 0; 222 yylval.s = tostring(cbuf); 223 unput('/'); 224 RET(REGEXPR); } 225 <reg>. { CADD; } 226 227 <str>\" { BEGIN A; 228 cbuf[clen] = 0; s = tostring(cbuf); 229 cbuf[clen] = ' '; cbuf[++clen] = 0; 230 yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); 231 RET(STRING); } 232 <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 233 <str>"\\\"" { cbuf[clen++] = '"'; } 234 <str>"\\"n { cbuf[clen++] = '\n'; } 235 <str>"\\"t { cbuf[clen++] = '\t'; } 236 <str>"\\"f { cbuf[clen++] = '\f'; } 237 <str>"\\"r { cbuf[clen++] = '\r'; } 238 <str>"\\"b { cbuf[clen++] = '\b'; } 239 <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ 240 <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ 241 <str>"\\\\" { cbuf[clen++] = '\\'; } 242 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; 243 sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } 244 <str>"\\"x({H}+) { int n; /* ANSI permits any number! */ 245 sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } 246 <str>"\\". { cbuf[clen++] = yytext[1]; } 247 <str>. { CADD; } 248 249 %% 250 251 void 252 startreg() 253 { 254 BEGIN reg; 255 clen = 0; 256 } 257 258 /* input() and unput() are transcriptions of the standard lex 259 macros for input and output with additions for error message 260 printing. God help us all if someone changes how lex works. 261 */ 262 263 uchar ebuf[300]; 264 uchar *ep = ebuf; 265 266 int 267 input(void) 268 { 269 register int c; 270 extern uchar *lexprog; 271 272 if (yysptr > yysbuf) 273 c = U(*--yysptr); 274 else if (lexprog != NULL) /* awk '...' */ 275 c = *lexprog++; 276 else /* awk -f ... */ 277 c = pgetc(); 278 if (c == '\n') 279 yylineno++; 280 else if (c == EOF) 281 c = 0; 282 if (ep >= ebuf + sizeof ebuf) 283 ep = ebuf; 284 return *ep++ = c; 285 } 286 287 static void 288 unput(int c) 289 { 290 yytchar = c; 291 if (yytchar == '\n') 292 yylineno--; 293 *yysptr++ = yytchar; 294 if (--ep < ebuf) 295 ep = ebuf + sizeof(ebuf) - 1; 296 } 297 298 299 static void 300 unputstr(char *s) 301 { 302 int i; 303 304 for (i = strlen(s)-1; i >= 0; i--) 305 unput(s[i]); 306 }