1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License, Version 1.0 only
   7  * (the "License").  You may not use this file except in compliance
   8  * with the License.
   9  *
  10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing permissions
  13  * and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL HEADER in each
  16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  17  * If applicable, add the following below this CDDL HEADER, with the
  18  * fields enclosed by brackets "[]" replaced with your own identifying
  19  * information: Portions Copyright [yyyy] [name of copyright owner]
  20  *
  21  * CDDL HEADER END
  22  */
  23 %}
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29 /*
  30  * Copyright (C) Lucent Technologies 1997
  31  * All Rights Reserved
  32  *
  33  * Permission to use, copy, modify, and distribute this software and
  34  * its documentation for any purpose and without fee is hereby
  35  * granted, provided that the above copyright notice appear in all
  36  * copies and that both that the copyright notice and this
  37  * permission notice and warranty disclaimer appear in supporting
  38  * documentation, and that the name Lucent Technologies or any of
  39  * its entities not be used in advertising or publicity pertaining
  40  * to distribution of the software without specific, written prior
  41  * permission.
  42  *
  43  * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
  44  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
  45  * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
  46  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  47  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  48  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  49  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
  50  * THIS SOFTWARE.
  51  */
  52 
  53 %{
  54 %}
  55 
  56 %{
  57 #include "awk.h"
  58 int yywrap(void) { return(1); }
  59 
  60 Node    *beginloc = 0;
  61 Node    *endloc = 0;
  62 int     infunc  = 0;    /* = 1 if in arglist or body of func */
  63 int     inloop = 0;     /* = 1 if in while, for, do */
  64 uchar   *curfname = 0;  /* current function name */
  65 Node    *arglist = 0;   /* list of args for current function */
  66 static void     setfname(Cell *);
  67 static int      constnode(Node *);
  68 static uchar    *strnode(Node *);
  69 static Node     *notnull();
  70 %}
  71 
  72 %union {
  73         Node    *p;
  74         Cell    *cp;
  75         int     i;
  76         uchar   *s;
  77 }
  78 
  79 %token  <i>       FIRSTTOKEN      /* must be first */
  80 %token  <p>       PROGRAM PASTAT PASTAT2 XBEGIN XEND
  81 %token  <i>       NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
  82 %token  <i>       ARRAY
  83 %token  <i>       MATCH NOTMATCH MATCHOP
  84 %token  <i>       FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
  85 %token  <i>       AND BOR APPEND EQ GE GT LE LT NE IN
  86 %token  <i>       ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
  87 %token  <i>       SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
  88 %token  <i>       ADD MINUS MULT DIVIDE MOD
  89 %token  <i>       ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
  90 %token  <i>       PRINT PRINTF SPRINTF
  91 %token  <p>       ELSE INTEST CONDEXPR
  92 %token  <i>       POSTINCR PREINCR POSTDECR PREDECR
  93 %token  <cp>      VAR IVAR VARNF CALL NUMBER STRING
  94 %token  <s>       REGEXPR
  95 
  96 %type   <p>       pas pattern ppattern plist pplist patlist prarg term re
  97 %type   <p>       pa_pat pa_stat pa_stats
  98 %type   <s>       reg_expr
  99 %type   <p>       simple_stmt opt_simple_stmt stmt stmtlist
 100 %type   <p>       var varname funcname varlist
 101 %type   <p>       for if else while
 102 %type   <i>       do st
 103 %type   <i>       pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
 104 %type   <i>       subop print
 105 
 106 %right  ASGNOP
 107 %right  '?'
 108 %right  ':'
 109 %left   BOR
 110 %left   AND
 111 %left   GETLINE
 112 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
 113 %left   ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
 114 %left   GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
 115 %left   PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
 116 %left   REGEXPR VAR VARNF IVAR WHILE '('
 117 %left   CAT
 118 %left   '+' '-'
 119 %left   '*' '/' '%'
 120 %left   NOT UMINUS
 121 %right  POWER
 122 %right  DECR INCR
 123 %left   INDIRECT
 124 %token  LASTTOKEN       /* must be last */
 125 
 126 %%
 127 
 128 program:
 129           pas   { if (errorflag==0)
 130                         winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
 131         | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
 132         ;
 133 
 134 and:
 135           AND | and NL
 136         ;
 137 
 138 bor:
 139           BOR | bor NL
 140         ;
 141 
 142 comma:
 143           ',' | comma NL
 144         ;
 145 
 146 do:
 147           DO | do NL
 148         ;
 149 
 150 else:
 151           ELSE | else NL
 152         ;
 153 
 154 for:
 155           FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
 156                 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
 157         | FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
 158                 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
 159         | FOR '(' varname IN varname rparen {inloop++;} stmt
 160                 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
 161         ;
 162 
 163 funcname:
 164           VAR   { setfname($1); }
 165         | CALL  { setfname($1); }
 166         ;
 167 
 168 if:
 169           IF '(' pattern rparen         { $$ = notnull($3); }
 170         ;
 171 
 172 lbrace:
 173           '{' | lbrace NL
 174         ;
 175 
 176 nl:
 177           NL | nl NL
 178         ;
 179 
 180 opt_nl:
 181           /* empty */   { $$ = 0; }
 182         | nl
 183         ;
 184 
 185 opt_pst:
 186           /* empty */   { $$ = 0; }
 187         | pst
 188         ;
 189 
 190 
 191 opt_simple_stmt:
 192           /* empty */                   { $$ = 0; }
 193         | simple_stmt
 194         ;
 195 
 196 pas:
 197           opt_pst                       { $$ = 0; }
 198         | opt_pst pa_stats opt_pst      { $$ = $2; }
 199         ;
 200 
 201 pa_pat:
 202           pattern       { $$ = notnull($1); }
 203         ;
 204 
 205 pa_stat:
 206           pa_pat                        { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
 207         | pa_pat lbrace stmtlist '}'    { $$ = stat2(PASTAT, $1, $3); }
 208         | pa_pat ',' opt_nl pa_pat              { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
 209         | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'  { $$ = pa2stat($1, $4, $6); }
 210         | lbrace stmtlist '}'           { $$ = stat2(PASTAT, NIL, $2); }
 211         | XBEGIN lbrace stmtlist '}'
 212                 { beginloc = linkum(beginloc, $3); $$ = 0; }
 213         | XEND lbrace stmtlist '}'
 214                 { endloc = linkum(endloc, $3); $$ = 0; }
 215         | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
 216                 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
 217         ;
 218 
 219 pa_stats:
 220           pa_stat
 221         | pa_stats opt_pst pa_stat      { $$ = linkum($1, $3); }
 222         ;
 223 
 224 patlist:
 225           pattern
 226         | patlist comma pattern         { $$ = linkum($1, $3); }
 227         ;
 228 
 229 ppattern:
 230           var ASGNOP ppattern           { $$ = op2($2, $1, $3); }
 231         | ppattern '?' ppattern ':' ppattern %prec '?'
 232                 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
 233         | ppattern bor ppattern %prec BOR
 234                 { $$ = op2(BOR, notnull($1), notnull($3)); }
 235         | ppattern and ppattern %prec AND
 236                 { $$ = op2(AND, notnull($1), notnull($3)); }
 237         | ppattern MATCHOP reg_expr     { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
 238         | ppattern MATCHOP ppattern
 239                 { if (constnode($3))
 240                         $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
 241                   else
 242                         $$ = op3($2, (Node *)1, $1, $3); }
 243         | ppattern IN varname           { $$ = op2(INTEST, $1, makearr($3)); }
 244         | '(' plist ')' IN varname      { $$ = op2(INTEST, $2, makearr($5)); }
 245         | ppattern term %prec CAT       { $$ = op2(CAT, $1, $2); }
 246         | re
 247         | term
 248         ;
 249 
 250 pattern:
 251           var ASGNOP pattern            { $$ = op2($2, $1, $3); }
 252         | pattern '?' pattern ':' pattern %prec '?'
 253                 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
 254         | pattern bor pattern %prec BOR
 255                 { $$ = op2(BOR, notnull($1), notnull($3)); }
 256         | pattern and pattern %prec AND
 257                 { $$ = op2(AND, notnull($1), notnull($3)); }
 258         | pattern EQ pattern            { $$ = op2($2, $1, $3); }
 259         | pattern GE pattern            { $$ = op2($2, $1, $3); }
 260         | pattern GT pattern            { $$ = op2($2, $1, $3); }
 261         | pattern LE pattern            { $$ = op2($2, $1, $3); }
 262         | pattern LT pattern            { $$ = op2($2, $1, $3); }
 263         | pattern NE pattern            { $$ = op2($2, $1, $3); }
 264         | pattern MATCHOP reg_expr      { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
 265         | pattern MATCHOP pattern
 266                 { if (constnode($3))
 267                         $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
 268                   else
 269                         $$ = op3($2, (Node *)1, $1, $3); }
 270         | pattern IN varname            { $$ = op2(INTEST, $1, makearr($3)); }
 271         | '(' plist ')' IN varname      { $$ = op2(INTEST, $2, makearr($5)); }
 272         | pattern '|' GETLINE var       {
 273                         if (safe) SYNTAX("cmd | getline is unsafe");
 274                         else $$ = op3(GETLINE, $4, itonp($2), $1); }
 275         | pattern '|' GETLINE           {
 276                         if (safe) SYNTAX("cmd | getline is unsafe");
 277                         else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
 278         | pattern term %prec CAT        { $$ = op2(CAT, $1, $2); }
 279         | re
 280         | term
 281         ;
 282 
 283 plist:
 284           pattern comma pattern         { $$ = linkum($1, $3); }
 285         | plist comma pattern           { $$ = linkum($1, $3); }
 286         ;
 287 
 288 pplist:
 289           ppattern
 290         | pplist comma ppattern         { $$ = linkum($1, $3); }
 291 
 292 prarg:
 293           /* empty */                   { $$ = rectonode(); }
 294         | pplist
 295         | '(' plist ')'                 { $$ = $2; }
 296         ;
 297 
 298 print:
 299           PRINT | PRINTF
 300         ;
 301 
 302 pst:
 303           NL | ';' | pst NL | pst ';'
 304         ;
 305 
 306 rbrace:
 307           '}' | rbrace NL
 308         ;
 309 
 310 re:
 311            reg_expr
 312                 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
 313         | NOT re        { $$ = op1(NOT, notnull($2)); }
 314         ;
 315 
 316 reg_expr:
 317           '/' {startreg();} REGEXPR '/'         { $$ = $3; }
 318         ;
 319 
 320 rparen:
 321           ')' | rparen NL
 322         ;
 323 
 324 simple_stmt:
 325           print prarg '|' term          {
 326                         if (safe) SYNTAX("print | is unsafe");
 327                         else $$ = stat3($1, $2, itonp($3), $4); }
 328         | print prarg APPEND term       {
 329                         if (safe) SYNTAX("print >> is unsafe");
 330                         else $$ = stat3($1, $2, itonp($3), $4); }
 331         | print prarg GT term           {
 332                         if (safe) SYNTAX("print > is unsafe");
 333                         else $$ = stat3($1, $2, itonp($3), $4); }
 334         | print prarg                   { $$ = stat3($1, $2, NIL, NIL); }
 335         | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
 336         | DELETE varname                { $$ = stat2(DELETE, makearr($2), 0); }
 337         | pattern                       { $$ = exptostat($1); }
 338         | error                         { yyclearin; SYNTAX("illegal statement"); }
 339         ;
 340 
 341 st:
 342           nl
 343         | ';' opt_nl
 344         ;
 345 
 346 stmt:
 347           BREAK st              {
 348                         if (!inloop) SYNTAX("break illegal outside of loops");
 349                         $$ = stat1(BREAK, NIL); }
 350         | CONTINUE st           {
 351                         if (!inloop) SYNTAX("continue illegal outside of loops");
 352                          $$ = stat1(CONTINUE, NIL); }
 353         | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
 354                 { $$ = stat2(DO, $3, notnull($7)); }
 355         | EXIT pattern st       { $$ = stat1(EXIT, $2); }
 356         | EXIT st               { $$ = stat1(EXIT, NIL); }
 357         | for
 358         | if stmt else stmt     { $$ = stat3(IF, $1, $2, $4); }
 359         | if stmt               { $$ = stat3(IF, $1, $2, NIL); }
 360         | lbrace stmtlist rbrace { $$ = $2; }
 361         | NEXT st       { if (infunc)
 362                                 SYNTAX("next is illegal inside a function");
 363                           $$ = stat1(NEXT, NIL); }
 364         | NEXTFILE st   { if (infunc)
 365                                 SYNTAX("nextfile is illegal inside a function");
 366                           $$ = stat1(NEXTFILE, NIL); }
 367         | RETURN pattern st     { $$ = stat1(RETURN, $2); }
 368         | RETURN st             { $$ = stat1(RETURN, NIL); }
 369         | simple_stmt st
 370         | while {inloop++;} stmt                { --inloop; $$ = stat2(WHILE, $1, $3); }
 371         | ';' opt_nl            { $$ = 0; }
 372         ;
 373 
 374 stmtlist:
 375           stmt
 376         | stmtlist stmt         { $$ = linkum($1, $2); }
 377         ;
 378 
 379 subop:
 380           SUB | GSUB
 381         ;
 382 
 383 term:
 384           term '/' ASGNOP term          { $$ = op2(DIVEQ, $1, $4); }
 385         | term '+' term                 { $$ = op2(ADD, $1, $3); }
 386         | term '-' term                 { $$ = op2(MINUS, $1, $3); }
 387         | term '*' term                 { $$ = op2(MULT, $1, $3); }
 388         | term '/' term                 { $$ = op2(DIVIDE, $1, $3); }
 389         | term '%' term                 { $$ = op2(MOD, $1, $3); }
 390         | term POWER term               { $$ = op2(POWER, $1, $3); }
 391         | '-' term %prec UMINUS         { $$ = op1(UMINUS, $2); }
 392         | '+' term %prec UMINUS         { $$ = $2; }
 393         | NOT term %prec UMINUS         { $$ = op1(NOT, notnull($2)); }
 394         | BLTIN '(' ')'                 { $$ = op2(BLTIN, itonp($1), rectonode()); }
 395         | BLTIN '(' patlist ')'         { $$ = op2(BLTIN, itonp($1), $3); }
 396         | BLTIN                         { $$ = op2(BLTIN, itonp($1), rectonode()); }
 397         | CALL '(' ')'                  { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
 398         | CALL '(' patlist ')'          { $$ = op2(CALL, celltonode($1,CVAR), $3); }
 399         | CLOSE term                    { $$ = op1(CLOSE, $2); }
 400         | DECR var                      { $$ = op1(PREDECR, $2); }
 401         | INCR var                      { $$ = op1(PREINCR, $2); }
 402         | var DECR                      { $$ = op1(POSTDECR, $1); }
 403         | var INCR                      { $$ = op1(POSTINCR, $1); }
 404         | GETLINE var LT term           { $$ = op3(GETLINE, $2, itonp($3), $4); }
 405         | GETLINE LT term               { $$ = op3(GETLINE, NIL, itonp($2), $3); }
 406         | GETLINE var                   { $$ = op3(GETLINE, $2, NIL, NIL); }
 407         | GETLINE                       { $$ = op3(GETLINE, NIL, NIL, NIL); }
 408         | INDEX '(' pattern comma pattern ')'
 409                 { $$ = op2(INDEX, $3, $5); }
 410         | INDEX '(' pattern comma reg_expr ')'
 411                 { SYNTAX("index() doesn't permit regular expressions");
 412                   /* LINTED align */
 413                   $$ = op2(INDEX, $3, (Node*)$5); }
 414         | '(' pattern ')'               { $$ = $2; }
 415         | MATCHFCN '(' pattern comma reg_expr ')'
 416                 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
 417         | MATCHFCN '(' pattern comma pattern ')'
 418                 { if (constnode($5))
 419                         $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
 420                   else
 421                         $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
 422         | NUMBER                        { $$ = celltonode($1, CCON); }
 423         | SPLIT '(' pattern comma varname comma pattern ')'     /* string */
 424                 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
 425         | SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
 426                 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
 427         | SPLIT '(' pattern comma varname ')'
 428                 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
 429         | SPRINTF '(' patlist ')'       { $$ = op1($1, $3); }
 430         | STRING                        { $$ = celltonode($1, CCON); }
 431         | subop '(' reg_expr comma pattern ')'
 432                 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
 433         | subop '(' pattern comma pattern ')'
 434                 { if (constnode($3))
 435                         $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
 436                   else
 437                         $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
 438         | subop '(' reg_expr comma pattern comma var ')'
 439                 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
 440         | subop '(' pattern comma pattern comma var ')'
 441                 { if (constnode($3))
 442                         $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
 443                   else
 444                         $$ = op4($1, (Node *)1, $3, $5, $7); }
 445         | SUBSTR '(' pattern comma pattern comma pattern ')'
 446                 { $$ = op3(SUBSTR, $3, $5, $7); }
 447         | SUBSTR '(' pattern comma pattern ')'
 448                 { $$ = op3(SUBSTR, $3, $5, NIL); }
 449         | var
 450         ;
 451 
 452 var:
 453           varname
 454         | varname '[' patlist ']'       { $$ = op2(ARRAY, makearr($1), $3); }
 455         | IVAR                          { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
 456         | INDIRECT term                 { $$ = op1(INDIRECT, $2); }
 457         ;       
 458 
 459 varlist:
 460           /* nothing */         { arglist = $$ = 0; }
 461         | VAR                   { arglist = $$ = celltonode($1,CVAR); }
 462         | varlist comma VAR     {
 463                         checkdup($1, $3);
 464                         arglist = $$ = linkum($1,celltonode($3,CVAR)); }
 465         ;
 466 
 467 varname:
 468           VAR                   { $$ = celltonode($1, CVAR); }
 469         | ARG                   { $$ = op1(ARG, itonp($1)); }
 470         | VARNF                 { $$ = op1(VARNF, (Node *) $1); }
 471         ;
 472 
 473 
 474 while:
 475           WHILE '(' pattern rparen      { $$ = notnull($3); }
 476         ;
 477 
 478 %%
 479 
 480 static void
 481 setfname(Cell *p)
 482 {
 483         if (isarr(p))
 484                 SYNTAX("%s is an array, not a function", p->nval);
 485         else if (isfcn(p))
 486                 SYNTAX("you can't define function %s more than once", p->nval);
 487         curfname = p->nval;
 488 }
 489 
 490 
 491 static int
 492 constnode(Node *p)
 493 {
 494         return (isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON);
 495 }
 496 
 497 static uchar *
 498 strnode(Node *p)
 499 {
 500         return (((Cell *)(p->narg[0]))->sval);
 501 }
 502 
 503 static Node *
 504 notnull(Node *n)
 505 {
 506         switch (n->nobj) {
 507         case LE: case LT: case EQ: case NE: case GT: case GE:
 508         case BOR: case AND: case NOT:
 509                 return (n);
 510         default:
 511                 return (op2(NE, n, nullnode));
 512         }
 513 }
 514 
 515 static void
 516 checkdup(Node *vl, Cell *cp)    /* check if name already in list */
 517 {
 518         uchar *s = cp->nval;
 519 
 520         for (; vl; vl = vl->nnext) {
 521                 if (strcmp((char *)s, (char *)((Cell *)(vl->narg[0]))->nval) == 0) {
 522                         SYNTAX("duplicate argument %s", s);
 523                         break;
 524                 }
 525         }
 526 }