1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License, Version 1.0 only
   7  * (the "License").  You may not use this file except in compliance
   8  * with the License.
   9  *
  10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing permissions
  13  * and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL HEADER in each
  16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  17  * If applicable, add the following below this CDDL HEADER, with the
  18  * fields enclosed by brackets "[]" replaced with your own identifying
  19  * information: Portions Copyright [yyyy] [name of copyright owner]
  20  *
  21  * CDDL HEADER END
  22  */
  23 
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 /*        All Rights Reserved   */
  31 %}
  32 
  33 %{
  34 #pragma ident   "%Z%%M% %I%     %E% SMI"
  35 %}
  36 
  37 %Start A str sc reg comment
  38 
  39 %{
  40 
  41 #include        <sys/types.h>
  42 #include        "awk.h"
  43 #include        "y.tab.h"
  44 
  45 #undef  input   /* defeat lex */
  46 #undef  unput
  47 
  48 static void unput(int);
  49 static void unputstr(char *);
  50 
  51 extern YYSTYPE  yylval;
  52 extern int      infunc;
  53 
  54 off_t   lineno  = 1;
  55 int     bracecnt = 0;
  56 int     brackcnt  = 0;
  57 int     parencnt = 0;
  58 #define DEBUG
  59 #ifdef  DEBUG
  60 #       define  RET(x)  {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
  61 #else
  62 #       define  RET(x)  return(x)
  63 #endif
  64 
  65 /*
  66  * The standards (SUSV2) requires that Record size be atleast LINE_MAX.
  67  * LINE_MAX is a standard variable defined in limits.h.
  68  * Though nawk is not standards compliant, we let RECSIZE
  69  * grow with LINE_MAX instead of the magic number 1024.
  70  */
  71 #define CBUFLEN (3 * LINE_MAX)
  72 
  73 #define CADD    cbuf[clen++] = yytext[0]; \
  74                 if (clen >= CBUFLEN-1) { \
  75                         ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
  76                         BEGIN A; \
  77                 }
  78 
  79 static uchar    cbuf[CBUFLEN];
  80 static uchar    *s;
  81 static int      clen, cflag;
  82 %}
  83 
  84 A       [a-zA-Z_]
  85 B       [a-zA-Z0-9_]
  86 D       [0-9]
  87 O       [0-7]
  88 H       [0-9a-fA-F]
  89 WS      [ \t]
  90 
  91 %%
  92         switch (yybgin-yysvec-1) {      /* witchcraft */
  93         case 0:
  94                 BEGIN A;
  95                 break;
  96         case sc:
  97                 BEGIN A;
  98                 RET('}');
  99         }
 100 
 101 <A>\n             { lineno++; RET(NL); }
 102 <A>#.*            { ; }   /* strip comments */
 103 <A>{WS}+  { ; }
 104 <A>;              { RET(';'); }
 105 
 106 <A>"\\"\n { lineno++; }
 107 <A>BEGIN  { RET(XBEGIN); }
 108 <A>END            { RET(XEND); }
 109 <A>func(tion)?    { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
 110 <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
 111 <A>"&&"           { RET(AND); }
 112 <A>"||"           { RET(BOR); }
 113 <A>"!"            { RET(NOT); }
 114 <A>"!="           { yylval.i = NE; RET(NE); }
 115 <A>"~"            { yylval.i = MATCH; RET(MATCHOP); }
 116 <A>"!~"           { yylval.i = NOTMATCH; RET(MATCHOP); }
 117 <A>"<"         { yylval.i = LT; RET(LT); }
 118 <A>"<="                { yylval.i = LE; RET(LE); }
 119 <A>"=="           { yylval.i = EQ; RET(EQ); }
 120 <A>">="                { yylval.i = GE; RET(GE); }
 121 <A>">"         { yylval.i = GT; RET(GT); }
 122 <A>">>"             { yylval.i = APPEND; RET(APPEND); }
 123 <A>"++"           { yylval.i = INCR; RET(INCR); }
 124 <A>"--"           { yylval.i = DECR; RET(DECR); }
 125 <A>"+="           { yylval.i = ADDEQ; RET(ASGNOP); }
 126 <A>"-="           { yylval.i = SUBEQ; RET(ASGNOP); }
 127 <A>"*="           { yylval.i = MULTEQ; RET(ASGNOP); }
 128 <A>"/="           { yylval.i = DIVEQ; RET(ASGNOP); }
 129 <A>"%="           { yylval.i = MODEQ; RET(ASGNOP); }
 130 <A>"^="           { yylval.i = POWEQ; RET(ASGNOP); }
 131 <A>"**="  { yylval.i = POWEQ; RET(ASGNOP); }
 132 <A>"="            { yylval.i = ASSIGN; RET(ASGNOP); }
 133 <A>"**"           { RET(POWER); }
 134 <A>"^"            { RET(POWER); }
 135 
 136 <A>"$"{D}+        { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
 137 <A>"$NF"  { unputstr("(NF)"); return(INDIRECT); }
 138 <A>"$"{A}{B}*     { int c, n;
 139                   c = input(); unput(c);
 140                   if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
 141                         unputstr(yytext+1);
 142                         return(INDIRECT);
 143                   } else {
 144                         yylval.cp = setsymtab((uchar *)yytext+1,
 145                                 (uchar *)"",0.0,STR|NUM,symtab);
 146                         RET(IVAR);
 147                   }
 148                 }
 149 <A>"$"            { RET(INDIRECT); }
 150 <A>NF             { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); }
 151 
 152 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?   {
 153                   yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab);
 154                   RET(NUMBER); }
 155 
 156 <A>while  { RET(WHILE); }
 157 <A>for            { RET(FOR); }
 158 <A>do             { RET(DO); }
 159 <A>if             { RET(IF); }
 160 <A>else           { RET(ELSE); }
 161 <A>next           { RET(NEXT); }
 162 <A>exit           { RET(EXIT); }
 163 <A>break  { RET(BREAK); }
 164 <A>continue       { RET(CONTINUE); }
 165 <A>print  { yylval.i = PRINT; RET(PRINT); }
 166 <A>printf { yylval.i = PRINTF; RET(PRINTF); }
 167 <A>sprintf        { yylval.i = SPRINTF; RET(SPRINTF); }
 168 <A>split  { yylval.i = SPLIT; RET(SPLIT); }
 169 <A>substr { RET(SUBSTR); }
 170 <A>sub            { yylval.i = SUB; RET(SUB); }
 171 <A>gsub           { yylval.i = GSUB; RET(GSUB); }
 172 <A>index  { RET(INDEX); }
 173 <A>match  { RET(MATCHFCN); }
 174 <A>in             { RET(IN); }
 175 <A>getline        { RET(GETLINE); }
 176 <A>close  { RET(CLOSE); }
 177 <A>delete { RET(DELETE); }
 178 <A>length { yylval.i = FLENGTH; RET(BLTIN); }
 179 <A>log            { yylval.i = FLOG; RET(BLTIN); }
 180 <A>int            { yylval.i = FINT; RET(BLTIN); }
 181 <A>exp            { yylval.i = FEXP; RET(BLTIN); }
 182 <A>sqrt           { yylval.i = FSQRT; RET(BLTIN); }
 183 <A>sin            { yylval.i = FSIN; RET(BLTIN); }
 184 <A>cos            { yylval.i = FCOS; RET(BLTIN); }
 185 <A>atan2  { yylval.i = FATAN; RET(BLTIN); }
 186 <A>system { yylval.i = FSYSTEM; RET(BLTIN); }
 187 <A>rand           { yylval.i = FRAND; RET(BLTIN); }
 188 <A>srand  { yylval.i = FSRAND; RET(BLTIN); }
 189 <A>toupper        { yylval.i = FTOUPPER; RET(BLTIN); }
 190 <A>tolower        { yylval.i = FTOLOWER; RET(BLTIN); }
 191 
 192 <A>{A}{B}*        { int n, c;
 193                   c = input(); unput(c);        /* look for '(' */
 194                   if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
 195                         yylval.i = n;
 196                         RET(ARG);
 197                   } else {
 198                         yylval.cp = setsymtab((uchar *)yytext,
 199                                 (uchar *)"",0.0,STR|NUM,symtab);
 200                         if (c == '(') {
 201                                 RET(CALL);
 202                         } else {
 203                                 RET(VAR);
 204                         }
 205                   }
 206                 }
 207 <A>\"             { BEGIN str; clen = 0; }
 208 
 209 <A>"}"            { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
 210 <A>"]"            { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
 211 <A>")"            { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
 212 
 213 <A>.              { if (yytext[0] == '{') bracecnt++;
 214                   else if (yytext[0] == '[') brackcnt++;
 215                   else if (yytext[0] == '(') parencnt++;
 216                   RET(yylval.i = yytext[0]); /* everything else */ }
 217 
 218 <reg>\\.  { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
 219 <reg>\n           { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
 220 <reg>"/"  { BEGIN A;
 221                   cbuf[clen] = 0;
 222                   yylval.s = tostring(cbuf);
 223                   unput('/');
 224                   RET(REGEXPR); }
 225 <reg>.            { CADD; }
 226 
 227 <str>\"           { BEGIN A;
 228                   cbuf[clen] = 0; s = tostring(cbuf);
 229                   cbuf[clen] = ' '; cbuf[++clen] = 0;
 230                   yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
 231                   RET(STRING); }
 232 <str>\n           { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
 233 <str>"\\\""       { cbuf[clen++] = '"'; }
 234 <str>"\\"n        { cbuf[clen++] = '\n'; }
 235 <str>"\\"t        { cbuf[clen++] = '\t'; }
 236 <str>"\\"f        { cbuf[clen++] = '\f'; }
 237 <str>"\\"r        { cbuf[clen++] = '\r'; }
 238 <str>"\\"b        { cbuf[clen++] = '\b'; }
 239 <str>"\\"v        { cbuf[clen++] = '\v'; }        /* these ANSIisms may not be known by */
 240 <str>"\\"a        { cbuf[clen++] = '\007'; }      /* your compiler. hence 007 for bell */
 241 <str>"\\\\"       { cbuf[clen++] = '\\'; }
 242 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
 243                   sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
 244 <str>"\\"x({H}+) { int n; /* ANSI permits any number! */
 245                   sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
 246 <str>"\\".        { cbuf[clen++] = yytext[1]; }
 247 <str>.            { CADD; }
 248 
 249 %%
 250 
 251 void
 252 startreg()
 253 {
 254         BEGIN reg;
 255         clen = 0;
 256 }
 257 
 258 /* input() and unput() are transcriptions of the standard lex
 259    macros for input and output with additions for error message
 260    printing.  God help us all if someone changes how lex works.
 261 */
 262 
 263 uchar   ebuf[300];
 264 uchar   *ep = ebuf;
 265 
 266 int
 267 input(void)
 268 {
 269         register int c;
 270         extern uchar *lexprog;
 271 
 272         if (yysptr > yysbuf)
 273                 c = U(*--yysptr);
 274         else if (lexprog != NULL)       /* awk '...' */
 275                 c = *lexprog++;
 276         else                            /* awk -f ... */
 277                 c = pgetc();
 278         if (c == '\n')
 279                 yylineno++;
 280         else if (c == EOF)
 281                 c = 0;
 282         if (ep >= ebuf + sizeof ebuf)
 283                 ep = ebuf;
 284         return *ep++ = c;
 285 }
 286 
 287 static void
 288 unput(int c)
 289 {
 290         yytchar = c;
 291         if (yytchar == '\n')
 292                 yylineno--;
 293         *yysptr++ = yytchar;
 294         if (--ep < ebuf)
 295                 ep = ebuf + sizeof(ebuf) - 1;
 296 }
 297 
 298 
 299 static void
 300 unputstr(char *s)
 301 {
 302         int i;
 303 
 304         for (i = strlen(s)-1; i >= 0; i--)
 305                 unput(s[i]);
 306 }