1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 #pragma ident   "%Z%%M% %I%     %E% SMI"
  31 
  32 #define DEBUG
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <ctype.h>
  36 #include <string.h>
  37 #include "awk.h"
  38 #include "y.tab.h"
  39 
  40 #define FULLTAB 2       /* rehash when table gets this x full */
  41 #define GROWTAB 4       /* grow table by this factor */
  42 
  43 Array   *symtab;        /* main symbol table */
  44 
  45 uchar   **FS;           /* initial field sep */
  46 uchar   **RS;           /* initial record sep */
  47 uchar   **OFS;          /* output field sep */
  48 uchar   **ORS;          /* output record sep */
  49 uchar   **OFMT;         /* output format for numbers */
  50 Awkfloat *NF;           /* number of fields in current record */
  51 Awkfloat *NR;           /* number of current record */
  52 Awkfloat *FNR;          /* number of current record in current file */
  53 uchar   **FILENAME;     /* current filename argument */
  54 Awkfloat *ARGC;         /* number of arguments from command line */
  55 uchar   **SUBSEP;       /* subscript separator for a[i,j,k]; default \034 */
  56 Awkfloat *RSTART;       /* start of re matched with ~; origin 1 (!) */
  57 Awkfloat *RLENGTH;      /* length of same */
  58 
  59 Cell    *recloc;        /* location of record */
  60 Cell    *nrloc;         /* NR */
  61 Cell    *nfloc;         /* NF */
  62 Cell    *fnrloc;        /* FNR */
  63 Array   *ARGVtab;       /* symbol table containing ARGV[...] */
  64 Array   *ENVtab;        /* symbol table containing ENVIRON[...] */
  65 Cell    *rstartloc;     /* RSTART */
  66 Cell    *rlengthloc;    /* RLENGTH */
  67 Cell    *symtabloc;     /* SYMTAB */
  68 
  69 Cell    *nullloc;
  70 Node    *nullnode;      /* zero&null, converted into a node for comparisons */
  71 
  72 static  void    rehash(Array *);
  73 
  74 void
  75 syminit(void)
  76 {
  77         init_buf(&record, &record_size, LINE_INCR);
  78 
  79         /* initialize $0 */
  80         recloc = getfld(0);
  81         recloc->nval = (uchar *)"$0";
  82         recloc->sval = record;
  83         recloc->tval = REC|STR|DONTFREE;
  84 
  85         symtab = makesymtab(NSYMTAB);
  86         (void) setsymtab((uchar *)"0", (uchar *)"0", 0.0,
  87             NUM|STR|CON|DONTFREE, symtab);
  88         /* this is used for if(x)... tests: */
  89         nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
  90             NUM|STR|CON|DONTFREE, symtab);
  91         nullnode = valtonode(nullloc, CCON);
  92         FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
  93             STR|DONTFREE, symtab)->sval;
  94         RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
  95             STR|DONTFREE, symtab)->sval;
  96         OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
  97             STR|DONTFREE, symtab)->sval;
  98         ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
  99             STR|DONTFREE, symtab)->sval;
 100         OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
 101             STR|DONTFREE, symtab)->sval;
 102         FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
 103             STR|DONTFREE, symtab)->sval;
 104         nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
 105         NF = &nfloc->fval;
 106         nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
 107         NR = &nrloc->fval;
 108         fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
 109         FNR = &fnrloc->fval;
 110         SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
 111             STR|DONTFREE, symtab)->sval;
 112         rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
 113             NUM, symtab);
 114         RSTART = &rstartloc->fval;
 115         rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
 116             NUM, symtab);
 117         RLENGTH = &rlengthloc->fval;
 118         symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
 119         symtabloc->sval = (uchar *)symtab;
 120 }
 121 
 122 void
 123 arginit(int ac, uchar *av[])
 124 {
 125         Cell *cp;
 126         int i;
 127         uchar temp[11];
 128 
 129         /* first make FILENAME first real argument */
 130         for (i = 1; i < ac; i++) {
 131                 if (!isclvar(av[i])) {
 132                         (void) setsval(lookup((uchar *)"FILENAME", symtab),
 133                             av[i]);
 134                         break;
 135                 }
 136         }
 137         ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
 138             NUM, symtab)->fval;
 139         cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
 140         ARGVtab = makesymtab(NSYMTAB);  /* could be (int) ARGC as well */
 141         cp->sval = (uchar *) ARGVtab;
 142         for (i = 0; i < ac; i++) {
 143                 (void) sprintf((char *)temp, "%d", i);
 144                 if (is_number(*av)) {
 145                         (void) setsymtab(temp, *av, atof((const char *)*av),
 146                             STR|NUM, ARGVtab);
 147                 } else {
 148                         (void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
 149                 }
 150                 av++;
 151         }
 152 }
 153 
 154 void
 155 envinit(uchar *envp[])
 156 {
 157         Cell *cp;
 158         uchar *p;
 159 
 160         cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
 161         ENVtab = makesymtab(NSYMTAB);
 162         cp->sval = (uchar *) ENVtab;
 163         for (; *envp; envp++) {
 164                 if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
 165                         continue;
 166                 *p++ = 0;       /* split into two strings at = */
 167                 if (is_number(p)) {
 168                         (void) setsymtab(*envp, p, atof((const char *)p),
 169                             STR|NUM, ENVtab);
 170                 } else {
 171                         (void) setsymtab(*envp, p, 0.0, STR, ENVtab);
 172                 }
 173                 /* restore in case env is passed down to a shell */
 174                 p[-1] = '=';
 175         }
 176 }
 177 
 178 Array *
 179 makesymtab(int n)
 180 {
 181         Array *ap;
 182         Cell **tp;
 183 
 184         ap = (Array *)malloc(sizeof (Array));
 185         tp = (Cell **)calloc(n, sizeof (Cell *));
 186         if (ap == NULL || tp == NULL)
 187                 ERROR "out of space in makesymtab" FATAL;
 188         ap->nelem = 0;
 189         ap->size = n;
 190         ap->tab = tp;
 191         return (ap);
 192 }
 193 
 194 void
 195 freesymtab(Cell *ap)    /* free symbol table */
 196 {
 197         Cell *cp, *next;
 198         Array *tp;
 199         int i;
 200 
 201         if (!isarr(ap))
 202                 return;
 203         /*LINTED align*/
 204         tp = (Array *)ap->sval;
 205         if (tp == NULL)
 206                 return;
 207         for (i = 0; i < tp->size; i++) {
 208                 for (cp = tp->tab[i]; cp != NULL; cp = next) {
 209                         next = cp->cnext;
 210                         xfree(cp->nval);
 211                         if (freeable(cp))
 212                                 xfree(cp->sval);
 213                         free(cp);
 214                 }
 215         }
 216         free(tp->tab);
 217         free(tp);
 218 }
 219 
 220 void
 221 freeelem(Cell *ap, uchar *s)            /* free elem s from ap (i.e., ap["s"] */
 222 {
 223         Array *tp;
 224         Cell *p, *prev = NULL;
 225         int h;
 226 
 227         /*LINTED align*/
 228         tp = (Array *)ap->sval;
 229         h = hash(s, tp->size);
 230         for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
 231                 if (strcmp((char *)s, (char *)p->nval) == 0) {
 232                         if (prev == NULL)       /* 1st one */
 233                                 tp->tab[h] = p->cnext;
 234                         else                    /* middle somewhere */
 235                                 prev->cnext = p->cnext;
 236                         if (freeable(p))
 237                                 xfree(p->sval);
 238                         free(p->nval);
 239                         free(p);
 240                         tp->nelem--;
 241                         return;
 242                 }
 243 }
 244 
 245 Cell *
 246 setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp)
 247 {
 248         register int h;
 249         register Cell *p;
 250 
 251         if (n != NULL && (p = lookup(n, tp)) != NULL) {
 252                 dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval));
 253                 dprintf((" s=\"%s\" f=%g t=%p\n",
 254                     p->sval, p->fval, (void *)p->tval));
 255                 return (p);
 256         }
 257         p = (Cell *)malloc(sizeof (Cell));
 258         if (p == NULL)
 259                 ERROR "symbol table overflow at %s", n FATAL;
 260         p->nval = tostring(n);
 261         p->sval = s ? tostring(s) : tostring((uchar *)"");
 262         p->fval = f;
 263         p->tval = t;
 264         p->csub = 0;
 265 
 266         tp->nelem++;
 267         if (tp->nelem > FULLTAB * tp->size)
 268                 rehash(tp);
 269         h = hash(n, tp->size);
 270         p->cnext = tp->tab[h];
 271         tp->tab[h] = p;
 272         dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval));
 273         dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval));
 274         return (p);
 275 }
 276 
 277 int
 278 hash(uchar *s, int n)   /* form hash value for string s */
 279 {
 280         register unsigned hashval;
 281 
 282         for (hashval = 0; *s != '\0'; s++)
 283                 hashval = (*s + 31 * hashval);
 284         return (hashval % n);
 285 }
 286 
 287 static void
 288 rehash(Array *tp)       /* rehash items in small table into big one */
 289 {
 290         int i, nh, nsz;
 291         Cell *cp, *op, **np;
 292 
 293         nsz = GROWTAB * tp->size;
 294         np = (Cell **)calloc(nsz, sizeof (Cell *));
 295         if (np == NULL)
 296                 ERROR "out of space in rehash" FATAL;
 297         for (i = 0; i < tp->size; i++) {
 298                 for (cp = tp->tab[i]; cp; cp = op) {
 299                         op = cp->cnext;
 300                         nh = hash(cp->nval, nsz);
 301                         cp->cnext = np[nh];
 302                         np[nh] = cp;
 303                 }
 304         }
 305         free(tp->tab);
 306         tp->tab = np;
 307         tp->size = nsz;
 308 }
 309 
 310 Cell *
 311 lookup(uchar *s, Array *tp)     /* look for s in tp */
 312 {
 313         register Cell *p;
 314         int h;
 315 
 316         h = hash(s, tp->size);
 317         for (p = tp->tab[h]; p != NULL; p = p->cnext) {
 318                 if (strcmp((char *)s, (char *)p->nval) == 0)
 319                         return (p);     /* found it */
 320         }
 321         return (NULL);                  /* not found */
 322 }
 323 
 324 Awkfloat
 325 setfval(Cell *vp, Awkfloat f)
 326 {
 327         int     i;
 328 
 329         if ((vp->tval & (NUM | STR)) == 0)
 330                 funnyvar(vp, "assign to");
 331         if (vp->tval & FLD) {
 332                 donerec = 0;    /* mark $0 invalid */
 333                 i = fldidx(vp);
 334                 if (i > *NF)
 335                         newfld(i);
 336                 dprintf(("setting field %d to %g\n", i, f));
 337         } else if (vp->tval & REC) {
 338                 donefld = 0;    /* mark $1... invalid */
 339                 donerec = 1;
 340         }
 341         vp->tval &= ~STR;        /* mark string invalid */
 342         vp->tval |= NUM;     /* mark number ok */
 343         dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp,
 344             vp->nval ? vp->nval : (unsigned char *)"NULL",
 345             f, (void *)vp->tval));
 346         return (vp->fval = f);
 347 }
 348 
 349 void
 350 funnyvar(Cell *vp, char *rw)
 351 {
 352         if (vp->tval & ARR)
 353                 ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
 354         if (vp->tval & FCN)
 355                 ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
 356         ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
 357             vp, vp->nval, vp->sval, vp->fval, vp->tval CONT;
 358 }
 359 
 360 uchar *
 361 setsval(Cell *vp, uchar *s)
 362 {
 363         int     i;
 364 
 365         if ((vp->tval & (NUM | STR)) == 0)
 366                 funnyvar(vp, "assign to");
 367         if (vp->tval & FLD) {
 368                 donerec = 0;    /* mark $0 invalid */
 369                 i = fldidx(vp);
 370                 if (i > *NF)
 371                         newfld(i);
 372                 dprintf(("setting field %d to %s\n", i, s));
 373         } else if (vp->tval & REC) {
 374                 donefld = 0;    /* mark $1... invalid */
 375                 donerec = 1;
 376         }
 377         vp->tval &= ~NUM;
 378         vp->tval |= STR;
 379         if (freeable(vp))
 380                 xfree(vp->sval);
 381         vp->tval &= ~DONTFREE;
 382         dprintf(("setsval %p: %s = \"%s\", t=%p\n",
 383             (void *)vp,
 384             vp->nval ? (char *)vp->nval : "",
 385             s,
 386             (void *)(vp->tval ? (char *)vp->tval : "")));
 387         return (vp->sval = tostring(s));
 388 }
 389 
 390 Awkfloat
 391 r_getfval(Cell *vp)
 392 {
 393         if ((vp->tval & (NUM | STR)) == 0)
 394                 funnyvar(vp, "read value of");
 395         if ((vp->tval & FLD) && donefld == 0)
 396                 fldbld();
 397         else if ((vp->tval & REC) && donerec == 0)
 398                 recbld();
 399         if (!isnum(vp)) {       /* not a number */
 400                 vp->fval = atof((const char *)vp->sval);  /* best guess */
 401                 if (is_number(vp->sval) && !(vp->tval&CON))
 402                         vp->tval |= NUM;     /* make NUM only sparingly */
 403         }
 404         dprintf(("getfval %p: %s = %g, t=%p\n",
 405             (void *)vp, vp->nval, vp->fval, (void *)vp->tval));
 406         return (vp->fval);
 407 }
 408 
 409 uchar *
 410 r_getsval(Cell *vp)
 411 {
 412         uchar s[256];
 413 
 414         if ((vp->tval & (NUM | STR)) == 0)
 415                 funnyvar(vp, "read value of");
 416         if ((vp->tval & FLD) && donefld == 0)
 417                 fldbld();
 418         else if ((vp->tval & REC) && donerec == 0)
 419                 recbld();
 420         if ((vp->tval & STR) == 0) {
 421                 if (!(vp->tval&DONTFREE))
 422                         xfree(vp->sval);
 423                 if ((long long)vp->fval == vp->fval) {
 424                         (void) snprintf((char *)s, sizeof (s),
 425                             "%.20g", vp->fval);
 426                 } else {
 427                         /*LINTED*/
 428                         (void) snprintf((char *)s, sizeof (s),
 429                             (char *)*OFMT, vp->fval);
 430                 }
 431                 vp->sval = tostring(s);
 432                 vp->tval &= ~DONTFREE;
 433                 vp->tval |= STR;
 434         }
 435         dprintf(("getsval %p: %s = \"%s\", t=%p\n",
 436             (void *)vp,
 437             vp->nval ? (char *)vp->nval : "",
 438             vp->sval ? (char *)vp->sval : "",
 439             (void *)vp->tval));
 440         return (vp->sval);
 441 }
 442 
 443 uchar *
 444 tostring(uchar *s)
 445 {
 446         register uchar *p;
 447 
 448         p = (uchar *)malloc(strlen((char *)s)+1);
 449         if (p == NULL)
 450                 ERROR "out of space in tostring on %s", s FATAL;
 451         (void) strcpy((char *)p, (char *)s);
 452         return (p);
 453 }
 454 
 455 uchar *
 456 qstring(uchar *s, int delim)    /* collect string up to delim */
 457 {
 458         uchar *cbuf, *ret;
 459         int c, n;
 460         size_t  cbufsz, cnt;
 461 
 462         init_buf(&cbuf, &cbufsz, LINE_INCR);
 463 
 464         for (cnt = 0; (c = *s) != delim; s++) {
 465                 if (c == '\n') {
 466                         ERROR "newline in string %.10s...", cbuf SYNTAX;
 467                 } else if (c != '\\') {
 468                         expand_buf(&cbuf, &cbufsz, cnt);
 469                         cbuf[cnt++] = c;
 470                 } else {        /* \something */
 471                         expand_buf(&cbuf, &cbufsz, cnt);
 472                         switch (c = *++s) {
 473                         case '\\':      cbuf[cnt++] = '\\'; break;
 474                         case 'n':       cbuf[cnt++] = '\n'; break;
 475                         case 't':       cbuf[cnt++] = '\t'; break;
 476                         case 'b':       cbuf[cnt++] = '\b'; break;
 477                         case 'f':       cbuf[cnt++] = '\f'; break;
 478                         case 'r':       cbuf[cnt++] = '\r'; break;
 479                         default:
 480                                 if (!isdigit(c)) {
 481                                         cbuf[cnt++] = c;
 482                                         break;
 483                                 }
 484                                 n = c - '0';
 485                                 if (isdigit(s[1])) {
 486                                         n = 8 * n + *++s - '0';
 487                                         if (isdigit(s[1]))
 488                                                 n = 8 * n + *++s - '0';
 489                                 }
 490                                 cbuf[cnt++] = n;
 491                                 break;
 492                         }
 493                 }
 494         }
 495         cbuf[cnt] = '\0';
 496         ret = tostring(cbuf);
 497         free(cbuf);
 498         return (ret);
 499 }