1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (C) Lucent Technologies 1997
  29  * All Rights Reserved
  30  *
  31  * Permission to use, copy, modify, and distribute this software and
  32  * its documentation for any purpose and without fee is hereby
  33  * granted, provided that the above copyright notice appear in all
  34  * copies and that both that the copyright notice and this
  35  * permission notice and warranty disclaimer appear in supporting
  36  * documentation, and that the name Lucent Technologies or any of
  37  * its entities not be used in advertising or publicity pertaining
  38  * to distribution of the software without specific, written prior
  39  * permission.
  40  *
  41  * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
  42  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
  43  * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
  44  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  45  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  46  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  47  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
  48  * THIS SOFTWARE.
  49  */
  50 
  51 #define DEBUG
  52 #include <math.h>
  53 #include "awk.h"
  54 #include "y.tab.h"
  55 
  56 #define FULLTAB 2       /* rehash when table gets this x full */
  57 #define GROWTAB 4       /* grow table by this factor */
  58 
  59 Array   *symtab;        /* main symbol table */
  60 
  61 uchar   **FS;           /* initial field sep */
  62 uchar   **RS;           /* initial record sep */
  63 uchar   **OFS;          /* output field sep */
  64 uchar   **ORS;          /* output record sep */
  65 uchar   **OFMT;         /* output format for numbers */
  66 uchar   **CONVFMT;      /* format for conversions in getsval */
  67 Awkfloat *NF;           /* number of fields in current record */
  68 Awkfloat *NR;           /* number of current record */
  69 Awkfloat *FNR;          /* number of current record in current file */
  70 uchar   **FILENAME;     /* current filename argument */
  71 Awkfloat *ARGC;         /* number of arguments from command line */
  72 uchar   **SUBSEP;       /* subscript separator for a[i,j,k]; default \034 */
  73 Awkfloat *RSTART;       /* start of re matched with ~; origin 1 (!) */
  74 Awkfloat *RLENGTH;      /* length of same */
  75 
  76 Cell    *fsloc;         /* FS */
  77 Cell    *nrloc;         /* NR */
  78 Cell    *nfloc;         /* NF */
  79 Cell    *fnrloc;        /* FNR */
  80 Array   *ARGVtab;       /* symbol table containing ARGV[...] */
  81 Array   *ENVtab;        /* symbol table containing ENVIRON[...] */
  82 Cell    *rstartloc;     /* RSTART */
  83 Cell    *rlengthloc;    /* RLENGTH */
  84 Cell    *symtabloc;     /* SYMTAB */
  85 
  86 Cell    *nullloc;       /* a guaranteed empty cell */
  87 Node    *nullnode;      /* zero&null, converted into a node for comparisons */
  88 Cell    *literal0;
  89 
  90 static  void    rehash(Array *);
  91 
  92 void
  93 syminit(void)           /* initialize symbol table with builtin vars */
  94 {
  95         literal0 = setsymtab((uchar *)"0", (uchar *)"0", 0.0,
  96             NUM|STR|CON|DONTFREE, symtab);
  97         /* this is used for if(x)... tests: */
  98         nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
  99             NUM|STR|CON|DONTFREE, symtab);
 100         nullnode = celltonode(nullloc, CCON);
 101 
 102         fsloc = setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
 103             STR|DONTFREE, symtab);
 104         FS = &fsloc->sval;
 105         RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
 106             STR|DONTFREE, symtab)->sval;
 107         OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
 108             STR|DONTFREE, symtab)->sval;
 109         ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
 110             STR|DONTFREE, symtab)->sval;
 111         OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
 112             STR|DONTFREE, symtab)->sval;
 113         CONVFMT = &setsymtab((uchar *)"CONVFMT", (uchar *)"%.6g", 0.0,
 114             STR|DONTFREE, symtab)->sval;
 115         FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
 116             STR|DONTFREE, symtab)->sval;
 117         nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
 118         NF = &nfloc->fval;
 119         nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
 120         NR = &nrloc->fval;
 121         fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
 122         FNR = &fnrloc->fval;
 123         SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
 124             STR|DONTFREE, symtab)->sval;
 125         rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
 126             NUM, symtab);
 127         RSTART = &rstartloc->fval;
 128         rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
 129             NUM, symtab);
 130         RLENGTH = &rlengthloc->fval;
 131         symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
 132         symtabloc->sval = (uchar *)symtab;
 133 }
 134 
 135 void
 136 arginit(int ac, uchar *av[]) /* set up ARGV and ARGC */
 137 {
 138         Cell *cp;
 139         int i;
 140         uchar temp[50];
 141 
 142         ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
 143             NUM, symtab)->fval;
 144         cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
 145         ARGVtab = makesymtab(NSYMTAB);  /* could be (int) ARGC as well */
 146         cp->sval = (uchar *)ARGVtab;
 147         for (i = 0; i < ac; i++) {
 148                 (void) sprintf((char *)temp, "%d", i);
 149                 if (is_number(*av)) {
 150                         (void) setsymtab(temp, *av, atof((const char *)*av),
 151                             STR|NUM, ARGVtab);
 152                 } else {
 153                         (void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
 154                 }
 155                 av++;
 156         }
 157 }
 158 
 159 void
 160 envinit(uchar **envp)           /* set up ENVIRON variable */
 161 {
 162         Cell *cp;
 163         uchar *p;
 164 
 165         cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
 166         ENVtab = makesymtab(NSYMTAB);
 167         cp->sval = (uchar *)ENVtab;
 168         for (; *envp; envp++) {
 169                 if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
 170                         continue;
 171                 if (p == *envp) /* no left hand side name in env string */
 172                         continue;
 173                 *p++ = 0;       /* split into two strings at = */
 174                 if (is_number(p)) {
 175                         (void) setsymtab(*envp, p, atof((const char *)p),
 176                             STR|NUM, ENVtab);
 177                 } else {
 178                         (void) setsymtab(*envp, p, 0.0, STR, ENVtab);
 179                 }
 180                 /* restore in case env is passed down to a shell */
 181                 p[-1] = '=';
 182         }
 183 }
 184 
 185 Array *
 186 makesymtab(int n)       /* make a new symbol table */
 187 {
 188         Array *ap;
 189         Cell **tp;
 190 
 191         ap = (Array *)malloc(sizeof (Array));
 192         tp = (Cell **)calloc(n, sizeof (Cell *));
 193         if (ap == NULL || tp == NULL)
 194                 FATAL("out of space in makesymtab");
 195         ap->nelem = 0;
 196         ap->size = n;
 197         ap->tab = tp;
 198         return (ap);
 199 }
 200 
 201 void
 202 freesymtab(Cell *ap)    /* free symbol table */
 203 {
 204         Cell *cp, *temp;
 205         Array *tp;
 206         int i;
 207 
 208         if (!isarr(ap))
 209                 return;
 210         /*LINTED align*/
 211         tp = (Array *)ap->sval;
 212         if (tp == NULL)
 213                 return;
 214         for (i = 0; i < tp->size; i++) {
 215                 for (cp = tp->tab[i]; cp != NULL; cp = temp) {
 216                         xfree(cp->nval);
 217                         if (freeable(cp))
 218                                 xfree(cp->sval);
 219                         /* avoids freeing then using */
 220                         temp = cp->cnext;
 221                         free(cp);
 222                         tp->nelem--;
 223                 }
 224                 tp->tab[i] = 0;
 225         }
 226         if (tp->nelem != 0)
 227                 WARNING("can't happen: inconsistent element count freeing %s",
 228                     ap->nval);
 229         free(tp->tab);
 230         free(tp);
 231 }
 232 
 233 void
 234 freeelem(Cell *ap, const uchar *s) /* free elem s from ap (i.e., ap["s"] */
 235 {
 236         Array *tp;
 237         Cell *p, *prev = NULL;
 238         int h;
 239 
 240         /*LINTED align*/
 241         tp = (Array *)ap->sval;
 242         h = hash(s, tp->size);
 243         for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
 244                 if (strcmp((char *)s, (char *)p->nval) == 0) {
 245                         if (prev == NULL)       /* 1st one */
 246                                 tp->tab[h] = p->cnext;
 247                         else                    /* middle somewhere */
 248                                 prev->cnext = p->cnext;
 249                         if (freeable(p))
 250                                 xfree(p->sval);
 251                         free(p->nval);
 252                         free(p);
 253                         tp->nelem--;
 254                         return;
 255                 }
 256 }
 257 
 258 Cell *
 259 setsymtab(const uchar *n, const uchar *s, Awkfloat f, unsigned int t,
 260     Array *tp)
 261 {
 262         int h;
 263         Cell *p;
 264 
 265         if (n != NULL && (p = lookup(n, tp)) != NULL) {
 266                 dprintf(("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
 267                     (void *)p, p->nval, p->sval, p->fval, p->tval));
 268                 return (p);
 269         }
 270         p = (Cell *)malloc(sizeof (Cell));
 271         if (p == NULL)
 272                 FATAL("out of space for symbol table at %s", n);
 273         p->nval = tostring(n);
 274         p->sval = s ? tostring(s) : tostring((uchar *)"");
 275         p->fval = f;
 276         p->tval = t;
 277         p->csub = CUNK;
 278         p->ctype = OCELL;
 279         tp->nelem++;
 280         if (tp->nelem > FULLTAB * tp->size)
 281                 rehash(tp);
 282         h = hash(n, tp->size);
 283         p->cnext = tp->tab[h];
 284         tp->tab[h] = p;
 285         dprintf(("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
 286             (void *)p, p->nval, p->sval, p->fval, p->tval));
 287         return (p);
 288 }
 289 
 290 int
 291 hash(const uchar *s, int n)     /* form hash value for string s */
 292 {
 293         unsigned hashval;
 294 
 295         for (hashval = 0; *s != '\0'; s++)
 296                 hashval = (*s + 31 * hashval);
 297         return (hashval % n);
 298 }
 299 
 300 static void
 301 rehash(Array *tp)       /* rehash items in small table into big one */
 302 {
 303         int i, nh, nsz;
 304         Cell *cp, *op, **np;
 305 
 306         nsz = GROWTAB * tp->size;
 307         np = (Cell **)calloc(nsz, sizeof (Cell *));
 308         if (np == NULL)         /* can't do it, but can keep running. */
 309                 return;         /* someone else will run out later. */
 310         for (i = 0; i < tp->size; i++) {
 311                 for (cp = tp->tab[i]; cp; cp = op) {
 312                         op = cp->cnext;
 313                         nh = hash(cp->nval, nsz);
 314                         cp->cnext = np[nh];
 315                         np[nh] = cp;
 316                 }
 317         }
 318         free(tp->tab);
 319         tp->tab = np;
 320         tp->size = nsz;
 321 }
 322 
 323 Cell *
 324 lookup(const uchar *s, Array *tp)       /* look for s in tp */
 325 {
 326         Cell *p;
 327         int h;
 328 
 329         h = hash(s, tp->size);
 330         for (p = tp->tab[h]; p != NULL; p = p->cnext) {
 331                 if (strcmp((char *)s, (char *)p->nval) == 0)
 332                         return (p);     /* found it */
 333         }
 334         return (NULL);                  /* not found */
 335 }
 336 
 337 Awkfloat
 338 setfval(Cell *vp, Awkfloat f)   /* set float val of a Cell */
 339 {
 340         int fldno;
 341 
 342         if ((vp->tval & (NUM | STR)) == 0)
 343                 funnyvar(vp, "assign to");
 344         if (isfld(vp)) {
 345                 donerec = 0;    /* mark $0 invalid */
 346                 fldno = atoi((char *)vp->nval);
 347                 if (fldno > *NF)
 348                         newfld(fldno);
 349                 dprintf(("setting field %d to %g\n", fldno, f));
 350         } else if (isrec(vp)) {
 351                 donefld = 0;    /* mark $1... invalid */
 352                 donerec = 1;
 353         }
 354         if (freeable(vp))
 355                 xfree(vp->sval); /* free any previous string */
 356         vp->tval &= ~STR;        /* mark string invalid */
 357         vp->tval |= NUM;     /* mark number ok */
 358         if (f == -0)  /* who would have thought this possible? */
 359                 f = 0;
 360         dprintf(("setfval %p: %s = %g, t=%o\n", (void *)vp,
 361             vp->nval, f, vp->tval));
 362         return (vp->fval = f);
 363 }
 364 
 365 void
 366 funnyvar(Cell *vp, char *rw)
 367 {
 368         if (isarr(vp))
 369                 FATAL("can't %s %s; it's an array name.", rw, vp->nval);
 370         if (vp->tval & FCN)
 371                 FATAL("can't %s %s; it's a function.", rw, vp->nval);
 372         WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
 373             vp, vp->nval, vp->sval, vp->fval, vp->tval);
 374 }
 375 
 376 uchar *
 377 setsval(Cell *vp, const uchar *s)       /* set string val of a Cell */
 378 {
 379         uchar *t;
 380         int fldno;
 381 
 382         dprintf(("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
 383             (void*)vp, vp->nval, s, vp->tval, donerec, donefld));
 384         if ((vp->tval & (NUM | STR)) == 0)
 385                 funnyvar(vp, "assign to");
 386         if (isfld(vp)) {
 387                 donerec = 0;    /* mark $0 invalid */
 388                 fldno = atoi((const char *)vp->nval);
 389                 if (fldno > *NF)
 390                         newfld(fldno);
 391                 dprintf(("setting field %d to %s (%p)\n", fldno, s, (void *)s));
 392         } else if (isrec(vp)) {
 393                 donefld = 0;    /* mark $1... invalid */
 394                 donerec = 1;
 395         }
 396         t = tostring(s);        /* in case it's self-assign */
 397         if (freeable(vp))
 398                 xfree(vp->sval);
 399         vp->tval &= ~NUM;
 400         vp->tval |= STR;
 401         vp->tval &= ~DONTFREE;
 402         dprintf(("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
 403             (void *)vp, vp->nval, t, (void *)t, vp->tval, donerec, donefld));
 404         return (vp->sval = t);
 405 }
 406 
 407 Awkfloat
 408 getfval(Cell *vp)       /* get float val of a Cell */
 409 {
 410         if ((vp->tval & (NUM | STR)) == 0)
 411                 funnyvar(vp, "read value of");
 412         if (isfld(vp) && donefld == 0)
 413                 fldbld();
 414         else if (isrec(vp) && donerec == 0)
 415                 recbld();
 416         if (!isnum(vp)) {       /* not a number */
 417                 vp->fval = atof((const char *)vp->sval);  /* best guess */
 418                 if (is_number(vp->sval) && !(vp->tval & CON))
 419                         vp->tval |= NUM;     /* make NUM only sparingly */
 420         }
 421         dprintf(("getfval %p: %s = %g, t=%o\n",
 422             (void *)vp, vp->nval, vp->fval, vp->tval));
 423         return (vp->fval);
 424 }
 425 
 426 static uchar *
 427 get_str_val(Cell *vp, uchar **fmt)      /* get string val of a Cell */
 428 {
 429         uchar s[100];   /* BUG: unchecked */
 430         double dtemp;
 431 
 432         if ((vp->tval & (NUM | STR)) == 0)
 433                 funnyvar(vp, "read value of");
 434         if (isfld(vp) && donefld == 0)
 435                 fldbld();
 436         else if (isrec(vp) && donerec == 0)
 437                 recbld();
 438         if (isstr(vp) == 0) {
 439                 if (freeable(vp))
 440                         xfree(vp->sval);
 441                 /* it's integral */
 442                 if (modf((long long)vp->fval, &dtemp) == 0) {
 443                         (void) snprintf((char *)s, sizeof (s),
 444                             "%.30g", vp->fval);
 445                 } else {
 446                         /*LINTED*/
 447                         (void) snprintf((char *)s, sizeof (s),
 448                             (char *)*fmt, vp->fval);
 449                 }
 450                 vp->sval = tostring(s);
 451                 vp->tval &= ~DONTFREE;
 452                 vp->tval |= STR;
 453         }
 454         dprintf(("getsval %p: %s = \"%s (%p)\", t=%o\n",
 455             (void *)vp, vp->nval, vp->sval, (void *)vp->sval, vp->tval));
 456         return (vp->sval);
 457 }
 458 
 459 uchar *
 460 getsval(Cell *vp)       /* get string val of a Cell */
 461 {
 462         return (get_str_val(vp, CONVFMT));
 463 }
 464 
 465 uchar *
 466 getpssval(Cell *vp)     /* get string val of a Cell for print */
 467 {
 468         return (get_str_val(vp, OFMT));
 469 }
 470 
 471 uchar *
 472 tostring(const uchar *s)        /* make a copy of string s */
 473 {
 474         uchar *p;
 475 
 476         p = (uchar *)malloc(strlen((char *)s)+1);
 477         if (p == NULL)
 478                 FATAL("out of space in tostring on %s", s);
 479         (void) strcpy((char *)p, (char *)s);
 480         return (p);
 481 }
 482 
 483 uchar *
 484 qstring(const uchar *is, int delim)     /* collect string up to delim */
 485 {
 486         const uchar *os = is;
 487         int c, n;
 488         uchar *s = (uchar *)is;
 489         uchar *buf, *bp;
 490 
 491         if ((buf = (uchar *)malloc(strlen((char *)is)+3)) == NULL)
 492                 FATAL("out of space in qstring(%s)", s);
 493         for (bp = buf; (c = *s) != delim; s++) {
 494                 if (c == '\n') {
 495                         SYNTAX("newline in string %.20s...", os);
 496                 } else if (c != '\\') {
 497                         *bp++ = c;
 498                 } else {        /* \something */
 499                         c = *++s;
 500                         if (c == 0) {   /* \ at end */
 501                                 *bp++ = '\\';
 502                                 break;  /* for loop */
 503                         }
 504                         switch (c) {
 505                         case '\\':      *bp++ = '\\'; break;
 506                         case 'n':       *bp++ = '\n'; break;
 507                         case 't':       *bp++ = '\t'; break;
 508                         case 'b':       *bp++ = '\b'; break;
 509                         case 'f':       *bp++ = '\f'; break;
 510                         case 'r':       *bp++ = '\r'; break;
 511                         default:
 512                                 if (!isdigit(c)) {
 513                                         *bp++ = c;
 514                                         break;
 515                                 }
 516                                 n = c - '0';
 517                                 if (isdigit(s[1])) {
 518                                         n = 8 * n + *++s - '0';
 519                                         if (isdigit(s[1]))
 520                                                 n = 8 * n + *++s - '0';
 521                                 }
 522                                 *bp++ = n;
 523                                 break;
 524                         }
 525                 }
 526         }
 527         *bp++ = 0;
 528         return (buf);
 529 }