1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License, Version 1.0 only
   7  * (the "License").  You may not use this file except in compliance
   8  * with the License.
   9  *
  10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing permissions
  13  * and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL HEADER in each
  16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  17  * If applicable, add the following below this CDDL HEADER, with the
  18  * fields enclosed by brackets "[]" replaced with your own identifying
  19  * information: Portions Copyright [yyyy] [name of copyright owner]
  20  *
  21  * CDDL HEADER END
  22  */
  23 %}
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 /*        All Rights Reserved   */
  31 
  32 %{
  33 #ident  "%Z%%M% %I%     %E% SMI"        /* SVr4.0 2.10  */
  34 %}
  35 
  36 %{
  37 #include "awk.h"
  38 int yywrap(void) { return(1); }
  39 #ifndef DEBUG
  40 #       define  PUTS(x)
  41 #endif
  42 Node    *beginloc = 0, *endloc = 0;
  43 int     infunc  = 0;    /* = 1 if in arglist or body of func */
  44 uchar   *curfname = 0;
  45 Node    *arglist = 0;   /* list of args for current function */
  46 static void     setfname(Cell *);
  47 static int      constnode(Node *);
  48 static uchar    *strnode(Node *);
  49 static Node     *notnull();
  50 %}
  51 
  52 %union {
  53         Node    *p;
  54         Cell    *cp;
  55         int     i;
  56         uchar   *s;
  57 }
  58 
  59 %token  <i>       FIRSTTOKEN      /* must be first */
  60 %token  <p>       PROGRAM PASTAT PASTAT2 XBEGIN XEND
  61 %token  <i>       NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
  62 %token  <i>       ARRAY
  63 %token  <i>       MATCH NOTMATCH MATCHOP
  64 %token  <i>       FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
  65 %token  <i>       AND BOR APPEND EQ GE GT LE LT NE IN
  66 %token  <i>       ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
  67 %token  <i>       SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
  68 %token  <i>       ADD MINUS MULT DIVIDE MOD
  69 %token  <i>       ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
  70 %token  <i>       PRINT PRINTF SPRINTF
  71 %token  <p>       ELSE INTEST CONDEXPR
  72 %token  <i>       POSTINCR PREINCR POSTDECR PREDECR
  73 %token  <cp>      VAR IVAR VARNF CALL NUMBER STRING FIELD
  74 %token  <s>       REGEXPR
  75 
  76 %type   <p>       pas pattern ppattern plist pplist patlist prarg term
  77 %type   <p>       pa_pat pa_stat pa_stats
  78 %type   <s>       reg_expr
  79 %type   <p>       simple_stmt opt_simple_stmt stmt stmtlist
  80 %type   <p>       var varname funcname varlist
  81 %type   <p>       for if while
  82 %type   <i>       pst opt_pst lbrace rparen comma nl opt_nl and bor
  83 %type   <i>       subop print
  84 
  85 %right  ASGNOP
  86 %right  '?'
  87 %right  ':'
  88 %left   BOR
  89 %left   AND
  90 %left   GETLINE
  91 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
  92 %left   ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC 
  93 %left   GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
  94 %left   PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
  95 %left   REGEXPR VAR VARNF IVAR WHILE '('
  96 %left   CAT
  97 %left   '+' '-'
  98 %left   '*' '/' '%'
  99 %left   NOT UMINUS
 100 %right  POWER
 101 %right  DECR INCR
 102 %left   INDIRECT
 103 %token  LASTTOKEN       /* must be last */
 104 
 105 %%
 106 
 107 program:
 108           pas   { if (errorflag==0)
 109                         winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
 110         | error { yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
 111         ;
 112 
 113 and:
 114           AND | and NL
 115         ;
 116 
 117 bor:
 118           BOR | bor NL
 119         ;
 120 
 121 comma:
 122           ',' | comma NL
 123         ;
 124 
 125 do:
 126           DO | do NL
 127         ;
 128 
 129 else:
 130           ELSE | else NL
 131         ;
 132 
 133 for:
 134           FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
 135                 { $$ = stat4(FOR, $3, notnull($5), $7, $9); }
 136         | FOR '(' opt_simple_stmt ';'  ';' opt_simple_stmt rparen stmt
 137                 { $$ = stat4(FOR, $3, NIL, $6, $8); }
 138         | FOR '(' varname IN varname rparen stmt
 139                 { $$ = stat3(IN, $3, makearr($5), $7); }
 140         ;
 141 
 142 funcname:
 143           VAR   { setfname($1); }
 144         | CALL  { setfname($1); }
 145         ;
 146 
 147 if:
 148           IF '(' pattern rparen         { $$ = notnull($3); }
 149         ;
 150 
 151 lbrace:
 152           '{' | lbrace NL
 153         ;
 154 
 155 nl:
 156           NL | nl NL
 157         ;
 158 
 159 opt_nl:
 160           /* empty */   { $$ = 0; }
 161         | nl
 162         ;
 163 
 164 opt_pst:
 165           /* empty */   { $$ = 0; }
 166         | pst
 167         ;
 168 
 169 
 170 opt_simple_stmt:
 171           /* empty */                   { $$ = 0; }
 172         | simple_stmt
 173         ;
 174 
 175 pas:
 176           opt_pst                       { $$ = 0; }
 177         | opt_pst pa_stats opt_pst      { $$ = $2; }
 178         ;
 179 
 180 pa_pat:
 181           pattern       { $$ = notnull($1); }
 182         ;
 183 
 184 pa_stat:
 185           pa_pat                        { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
 186         | pa_pat lbrace stmtlist '}'    { $$ = stat2(PASTAT, $1, $3); }
 187         | pa_pat ',' pa_pat             { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
 188         | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
 189         | lbrace stmtlist '}'           { $$ = stat2(PASTAT, NIL, $2); }
 190         | XBEGIN lbrace stmtlist '}'
 191                 { beginloc = linkum(beginloc, $3); $$ = 0; }
 192         | XEND lbrace stmtlist '}'
 193                 { endloc = linkum(endloc, $3); $$ = 0; }
 194         | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
 195                 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
 196         ;
 197 
 198 pa_stats:
 199           pa_stat
 200         | pa_stats opt_pst pa_stat      { $$ = linkum($1, $3); }
 201         ;
 202 
 203 patlist:
 204           pattern
 205         | patlist comma pattern { $$ = linkum($1, $3); }
 206         ;
 207 
 208 ppattern:
 209           var ASGNOP ppattern           { $$ = op2($2, $1, $3); }
 210         | ppattern '?' ppattern ':' ppattern %prec '?'
 211                 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
 212         | ppattern bor ppattern %prec BOR
 213                 { $$ = op2(BOR, notnull($1), notnull($3)); }
 214         | ppattern and ppattern %prec AND
 215                 { $$ = op2(AND, notnull($1), notnull($3)); }
 216         | NOT ppattern
 217                 { $$ = op1(NOT, notnull($2)); }
 218         | ppattern MATCHOP reg_expr     { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
 219         | ppattern MATCHOP ppattern
 220                 { if (constnode($3))
 221                         $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
 222                   else
 223                         $$ = op3($2, (Node *)1, $1, $3); }
 224         | ppattern IN varname           { $$ = op2(INTEST, $1, makearr($3)); }
 225         | '(' plist ')' IN varname      { $$ = op2(INTEST, $2, makearr($5)); }
 226         | ppattern term %prec CAT       { $$ = op2(CAT, $1, $2); }
 227         | reg_expr
 228                 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
 229         | term
 230         ;
 231 
 232 pattern:
 233           var ASGNOP pattern            { $$ = op2($2, $1, $3); }
 234         | pattern '?' pattern ':' pattern %prec '?'
 235                 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
 236         | pattern bor pattern %prec BOR
 237                 { $$ = op2(BOR, notnull($1), notnull($3)); }
 238         | pattern and pattern %prec AND
 239                 { $$ = op2(AND, notnull($1), notnull($3)); }
 240         | NOT pattern
 241                 { $$ = op1(NOT, op2(NE,$2,valtonode(lookup((uchar *)"$zero&null",symtab),CCON))); }
 242         | pattern EQ pattern            { $$ = op2($2, $1, $3); }
 243         | pattern GE pattern            { $$ = op2($2, $1, $3); }
 244         | pattern GT pattern            { $$ = op2($2, $1, $3); }
 245         | pattern LE pattern            { $$ = op2($2, $1, $3); }
 246         | pattern LT pattern            { $$ = op2($2, $1, $3); }
 247         | pattern NE pattern            { $$ = op2($2, $1, $3); }
 248         | pattern MATCHOP reg_expr      { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
 249         | pattern MATCHOP pattern
 250                 { if (constnode($3))
 251                         $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
 252                   else
 253                         $$ = op3($2, (Node *)1, $1, $3); }
 254         | pattern IN varname            { $$ = op2(INTEST, $1, makearr($3)); }
 255         | '(' plist ')' IN varname      { $$ = op2(INTEST, $2, makearr($5)); }
 256         | pattern '|' GETLINE var       { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
 257         | pattern '|' GETLINE           { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
 258         | pattern term %prec CAT        { $$ = op2(CAT, $1, $2); }
 259         | reg_expr
 260                 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
 261         | term
 262         ;
 263 
 264 plist:
 265           pattern comma pattern         { $$ = linkum($1, $3); }
 266         | plist comma pattern           { $$ = linkum($1, $3); }
 267         ;
 268 
 269 pplist:
 270           ppattern
 271         | pplist comma ppattern         { $$ = linkum($1, $3); }
 272 
 273 prarg:
 274           /* empty */                   { $$ = rectonode(); }
 275         | pplist
 276         | '(' plist ')'                 { $$ = $2; }
 277         ;
 278 
 279 print:
 280           PRINT | PRINTF
 281         ;
 282 
 283 pst:
 284           NL | ';' | pst NL | pst ';'
 285         ;
 286 
 287 rbrace:
 288           '}' | rbrace NL
 289         ;
 290 
 291 reg_expr:
 292           '/' {startreg();} REGEXPR '/'         { $$ = $3; }
 293         ;
 294 
 295 rparen:
 296           ')' | rparen NL
 297         ;
 298 
 299 simple_stmt:
 300           print prarg '|' term          { $$ = stat3($1, $2, (Node *) $3, $4); }
 301         | print prarg APPEND term       { $$ = stat3($1, $2, (Node *) $3, $4); }
 302         | print prarg GT term           { $$ = stat3($1, $2, (Node *) $3, $4); }
 303         | print prarg                   { $$ = stat3($1, $2, NIL, NIL); }
 304         | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
 305         | DELETE varname                { yyclearin; ERROR "you can only delete array[element]" SYNTAX; $$ = stat1(DELETE, $2); }
 306         | pattern                       { $$ = exptostat($1); }
 307         | error                         { yyclearin; ERROR "illegal statement" SYNTAX; }
 308         ;
 309 
 310 st:
 311           nl | ';' opt_nl
 312         ;
 313 
 314 stmt:
 315           BREAK st              { $$ = stat1(BREAK, NIL); }
 316         | CLOSE pattern st      { $$ = stat1(CLOSE, $2); }
 317         | CONTINUE st           { $$ = stat1(CONTINUE, NIL); }
 318         | do stmt WHILE '(' pattern ')' st
 319                 { $$ = stat2(DO, $2, notnull($5)); }
 320         | EXIT pattern st       { $$ = stat1(EXIT, $2); }
 321         | EXIT st               { $$ = stat1(EXIT, NIL); }
 322         | for
 323         | if stmt else stmt     { $$ = stat3(IF, $1, $2, $4); }
 324         | if stmt               { $$ = stat3(IF, $1, $2, NIL); }
 325         | lbrace stmtlist rbrace { $$ = $2; }
 326         | NEXT st       { if (infunc)
 327                                 ERROR "next is illegal inside a function" SYNTAX;
 328                           $$ = stat1(NEXT, NIL); }
 329         | RETURN pattern st     { $$ = stat1(RETURN, $2); }
 330         | RETURN st             { $$ = stat1(RETURN, NIL); }
 331         | simple_stmt st
 332         | while stmt            { $$ = stat2(WHILE, $1, $2); }
 333         | ';' opt_nl            { $$ = 0; }
 334         ;
 335 
 336 stmtlist:
 337           stmt
 338         | stmtlist stmt         { $$ = linkum($1, $2); }
 339         ;
 340 
 341 subop:
 342           SUB | GSUB
 343         ;
 344 
 345 term:
 346           term '+' term                 { $$ = op2(ADD, $1, $3); }
 347         | term '-' term                 { $$ = op2(MINUS, $1, $3); }
 348         | term '*' term                 { $$ = op2(MULT, $1, $3); }
 349         | term '/' term                 { $$ = op2(DIVIDE, $1, $3); }
 350         | term '%' term                 { $$ = op2(MOD, $1, $3); }
 351         | term POWER term               { $$ = op2(POWER, $1, $3); }
 352         | '-' term %prec UMINUS         { $$ = op1(UMINUS, $2); }
 353         | '+' term %prec UMINUS         { $$ = $2; }
 354         | BLTIN '(' ')'                 { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
 355         | BLTIN '(' patlist ')'         { $$ = op2(BLTIN, (Node *) $1, $3); }
 356         | BLTIN                         { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
 357         | CALL '(' ')'                  { $$ = op2(CALL, valtonode($1,CVAR), NIL); }
 358         | CALL '(' patlist ')'          { $$ = op2(CALL, valtonode($1,CVAR), $3); }
 359         | DECR var                      { $$ = op1(PREDECR, $2); }
 360         | INCR var                      { $$ = op1(PREINCR, $2); }
 361         | var DECR                      { $$ = op1(POSTDECR, $1); }
 362         | var INCR                      { $$ = op1(POSTINCR, $1); }
 363         | GETLINE var LT term           { $$ = op3(GETLINE, $2, (Node *)$3, $4); }
 364         | GETLINE LT term               { $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
 365         | GETLINE var                   { $$ = op3(GETLINE, $2, NIL, NIL); }
 366         | GETLINE                       { $$ = op3(GETLINE, NIL, NIL, NIL); }
 367         | INDEX '(' pattern comma pattern ')'
 368                 { $$ = op2(INDEX, $3, $5); }
 369         | INDEX '(' pattern comma reg_expr ')'
 370                 { ERROR "index() doesn't permit regular expressions" SYNTAX;
 371                   $$ = op2(INDEX, $3, (Node*)$5); }
 372         | '(' pattern ')'               { $$ = $2; }
 373         | MATCHFCN '(' pattern comma reg_expr ')'
 374                 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
 375         | MATCHFCN '(' pattern comma pattern ')'
 376                 { if (constnode($5))
 377                         $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
 378                   else
 379                         $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
 380         | NUMBER                        { $$ = valtonode($1, CCON); }
 381         | SPLIT '(' pattern comma varname comma pattern ')'     /* string */
 382                 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
 383         | SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
 384                 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
 385         | SPLIT '(' pattern comma varname ')'
 386                 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
 387         | SPRINTF '(' patlist ')'       { $$ = op1($1, $3); }
 388         | STRING                        { $$ = valtonode($1, CCON); }
 389         | subop '(' reg_expr comma pattern ')'
 390                 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
 391         | subop '(' pattern comma pattern ')'
 392                 { if (constnode($3))
 393                         $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
 394                   else
 395                         $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
 396         | subop '(' reg_expr comma pattern comma var ')'
 397                 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
 398         | subop '(' pattern comma pattern comma var ')'
 399                 { if (constnode($3))
 400                         $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
 401                   else
 402                         $$ = op4($1, (Node *)1, $3, $5, $7); }
 403         | SUBSTR '(' pattern comma pattern comma pattern ')'
 404                 { $$ = op3(SUBSTR, $3, $5, $7); }
 405         | SUBSTR '(' pattern comma pattern ')'
 406                 { $$ = op3(SUBSTR, $3, $5, NIL); }
 407         | var
 408         ;
 409 
 410 var:
 411           varname
 412         | varname '[' patlist ']'       { $$ = op2(ARRAY, makearr($1), $3); }
 413         | FIELD                         { $$ = valtonode($1, CFLD); }
 414         | IVAR                          { $$ = op1(INDIRECT, valtonode($1, CVAR)); }
 415         | INDIRECT term                 { $$ = op1(INDIRECT, $2); }
 416         ;       
 417 
 418 varlist:
 419           /* nothing */         { arglist = $$ = 0; }
 420         | VAR                   { arglist = $$ = valtonode($1,CVAR); }
 421         | varlist comma VAR     { arglist = $$ = linkum($1,valtonode($3,CVAR)); }
 422         ;
 423 
 424 varname:
 425           VAR                   { $$ = valtonode($1, CVAR); }
 426         | ARG                   { $$ = op1(ARG, (Node *) $1); }
 427         | VARNF                 { $$ = op1(VARNF, (Node *) $1); }
 428         ;
 429 
 430 
 431 while:
 432           WHILE '(' pattern rparen      { $$ = notnull($3); }
 433         ;
 434 
 435 %%
 436 
 437 static void
 438 setfname(Cell *p)
 439 {
 440         if (isarr(p))
 441                 ERROR "%s is an array, not a function", p->nval SYNTAX;
 442         else if (isfunc(p))
 443                 ERROR "you can't define function %s more than once", p->nval SYNTAX;
 444         curfname = p->nval;
 445 }
 446 
 447 
 448 static int
 449 constnode(Node *p)
 450 {
 451         return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON;
 452 }
 453 
 454 static uchar *
 455 strnode(Node *p)
 456 {
 457         return ((Cell *)(p->narg[0]))->sval;
 458 }
 459 
 460 static Node *
 461 notnull(Node *n)
 462 {
 463         switch (n->nobj) {
 464         case LE: case LT: case EQ: case NE: case GT: case GE:
 465         case BOR: case AND: case NOT:
 466                 return n;
 467         default:
 468                 return op2(NE, n, nullnode);
 469         }
 470 }