1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright (C) Lucent Technologies 1997
29 * All Rights Reserved
30 *
31 * Permission to use, copy, modify, and distribute this software and
32 * its documentation for any purpose and without fee is hereby
33 * granted, provided that the above copyright notice appear in all
34 * copies and that both that the copyright notice and this
35 * permission notice and warranty disclaimer appear in supporting
36 * documentation, and that the name Lucent Technologies or any of
37 * its entities not be used in advertising or publicity pertaining
38 * to distribution of the software without specific, written prior
39 * permission.
40 *
41 * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
42 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
43 * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
44 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
45 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
46 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
47 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
48 * THIS SOFTWARE.
49 */
50
51 #define DEBUG
52 #include <math.h>
53 #include "awk.h"
54 #include "y.tab.h"
55
56 #define FULLTAB 2 /* rehash when table gets this x full */
57 #define GROWTAB 4 /* grow table by this factor */
58
59 Array *symtab; /* main symbol table */
60
61 uchar **FS; /* initial field sep */
62 uchar **RS; /* initial record sep */
63 uchar **OFS; /* output field sep */
64 uchar **ORS; /* output record sep */
65 uchar **OFMT; /* output format for numbers */
66 uchar **CONVFMT; /* format for conversions in getsval */
67 Awkfloat *NF; /* number of fields in current record */
68 Awkfloat *NR; /* number of current record */
69 Awkfloat *FNR; /* number of current record in current file */
70 uchar **FILENAME; /* current filename argument */
71 Awkfloat *ARGC; /* number of arguments from command line */
72 uchar **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
73 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
74 Awkfloat *RLENGTH; /* length of same */
75
76 Cell *fsloc; /* FS */
77 Cell *nrloc; /* NR */
78 Cell *nfloc; /* NF */
79 Cell *fnrloc; /* FNR */
80 Array *ARGVtab; /* symbol table containing ARGV[...] */
81 Array *ENVtab; /* symbol table containing ENVIRON[...] */
82 Cell *rstartloc; /* RSTART */
83 Cell *rlengthloc; /* RLENGTH */
84 Cell *symtabloc; /* SYMTAB */
85
86 Cell *nullloc; /* a guaranteed empty cell */
87 Node *nullnode; /* zero&null, converted into a node for comparisons */
88 Cell *literal0;
89
90 static void rehash(Array *);
91
92 void
93 syminit(void) /* initialize symbol table with builtin vars */
94 {
95 literal0 = setsymtab((uchar *)"0", (uchar *)"0", 0.0,
96 NUM|STR|CON|DONTFREE, symtab);
97 /* this is used for if(x)... tests: */
98 nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
99 NUM|STR|CON|DONTFREE, symtab);
100 nullnode = celltonode(nullloc, CCON);
101
102 fsloc = setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
103 STR|DONTFREE, symtab);
104 FS = &fsloc->sval;
105 RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
106 STR|DONTFREE, symtab)->sval;
107 OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
108 STR|DONTFREE, symtab)->sval;
109 ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
110 STR|DONTFREE, symtab)->sval;
111 OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
112 STR|DONTFREE, symtab)->sval;
113 CONVFMT = &setsymtab((uchar *)"CONVFMT", (uchar *)"%.6g", 0.0,
114 STR|DONTFREE, symtab)->sval;
115 FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
116 STR|DONTFREE, symtab)->sval;
117 nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
118 NF = &nfloc->fval;
119 nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
120 NR = &nrloc->fval;
121 fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
122 FNR = &fnrloc->fval;
123 SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
124 STR|DONTFREE, symtab)->sval;
125 rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
126 NUM, symtab);
127 RSTART = &rstartloc->fval;
128 rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
129 NUM, symtab);
130 RLENGTH = &rlengthloc->fval;
131 symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
132 symtabloc->sval = (uchar *)symtab;
133 }
134
135 void
136 arginit(int ac, uchar *av[]) /* set up ARGV and ARGC */
137 {
138 Cell *cp;
139 int i;
140 uchar temp[50];
141
142 ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
143 NUM, symtab)->fval;
144 cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
145 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
146 cp->sval = (uchar *)ARGVtab;
147 for (i = 0; i < ac; i++) {
148 (void) sprintf((char *)temp, "%d", i);
149 if (is_number(*av)) {
150 (void) setsymtab(temp, *av, atof((const char *)*av),
151 STR|NUM, ARGVtab);
152 } else {
153 (void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
154 }
155 av++;
156 }
157 }
158
159 void
160 envinit(uchar **envp) /* set up ENVIRON variable */
161 {
162 Cell *cp;
163 uchar *p;
164
165 cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
166 ENVtab = makesymtab(NSYMTAB);
167 cp->sval = (uchar *)ENVtab;
168 for (; *envp; envp++) {
169 if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
170 continue;
171 if (p == *envp) /* no left hand side name in env string */
172 continue;
173 *p++ = 0; /* split into two strings at = */
174 if (is_number(p)) {
175 (void) setsymtab(*envp, p, atof((const char *)p),
176 STR|NUM, ENVtab);
177 } else {
178 (void) setsymtab(*envp, p, 0.0, STR, ENVtab);
179 }
180 /* restore in case env is passed down to a shell */
181 p[-1] = '=';
182 }
183 }
184
185 Array *
186 makesymtab(int n) /* make a new symbol table */
187 {
188 Array *ap;
189 Cell **tp;
190
191 ap = (Array *)malloc(sizeof (Array));
192 tp = (Cell **)calloc(n, sizeof (Cell *));
193 if (ap == NULL || tp == NULL)
194 FATAL("out of space in makesymtab");
195 ap->nelem = 0;
196 ap->size = n;
197 ap->tab = tp;
198 return (ap);
199 }
200
201 void
202 freesymtab(Cell *ap) /* free symbol table */
203 {
204 Cell *cp, *temp;
205 Array *tp;
206 int i;
207
208 if (!isarr(ap))
209 return;
210 /*LINTED align*/
211 tp = (Array *)ap->sval;
212 if (tp == NULL)
213 return;
214 for (i = 0; i < tp->size; i++) {
215 for (cp = tp->tab[i]; cp != NULL; cp = temp) {
216 xfree(cp->nval);
217 if (freeable(cp))
218 xfree(cp->sval);
219 /* avoids freeing then using */
220 temp = cp->cnext;
221 free(cp);
222 tp->nelem--;
223 }
224 tp->tab[i] = 0;
225 }
226 if (tp->nelem != 0)
227 WARNING("can't happen: inconsistent element count freeing %s",
228 ap->nval);
229 free(tp->tab);
230 free(tp);
231 }
232
233 void
234 freeelem(Cell *ap, const uchar *s) /* free elem s from ap (i.e., ap["s"] */
235 {
236 Array *tp;
237 Cell *p, *prev = NULL;
238 int h;
239
240 /*LINTED align*/
241 tp = (Array *)ap->sval;
242 h = hash(s, tp->size);
243 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
244 if (strcmp((char *)s, (char *)p->nval) == 0) {
245 if (prev == NULL) /* 1st one */
246 tp->tab[h] = p->cnext;
247 else /* middle somewhere */
248 prev->cnext = p->cnext;
249 if (freeable(p))
250 xfree(p->sval);
251 free(p->nval);
252 free(p);
253 tp->nelem--;
254 return;
255 }
256 }
257
258 Cell *
259 setsymtab(const uchar *n, const uchar *s, Awkfloat f, unsigned int t,
260 Array *tp)
261 {
262 int h;
263 Cell *p;
264
265 if (n != NULL && (p = lookup(n, tp)) != NULL) {
266 dprintf(("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
267 (void *)p, p->nval, p->sval, p->fval, p->tval));
268 return (p);
269 }
270 p = (Cell *)malloc(sizeof (Cell));
271 if (p == NULL)
272 FATAL("out of space for symbol table at %s", n);
273 p->nval = tostring(n);
274 p->sval = s ? tostring(s) : tostring((uchar *)"");
275 p->fval = f;
276 p->tval = t;
277 p->csub = CUNK;
278 p->ctype = OCELL;
279 tp->nelem++;
280 if (tp->nelem > FULLTAB * tp->size)
281 rehash(tp);
282 h = hash(n, tp->size);
283 p->cnext = tp->tab[h];
284 tp->tab[h] = p;
285 dprintf(("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
286 (void *)p, p->nval, p->sval, p->fval, p->tval));
287 return (p);
288 }
289
290 int
291 hash(const uchar *s, int n) /* form hash value for string s */
292 {
293 unsigned hashval;
294
295 for (hashval = 0; *s != '\0'; s++)
296 hashval = (*s + 31 * hashval);
297 return (hashval % n);
298 }
299
300 static void
301 rehash(Array *tp) /* rehash items in small table into big one */
302 {
303 int i, nh, nsz;
304 Cell *cp, *op, **np;
305
306 nsz = GROWTAB * tp->size;
307 np = (Cell **)calloc(nsz, sizeof (Cell *));
308 if (np == NULL) /* can't do it, but can keep running. */
309 return; /* someone else will run out later. */
310 for (i = 0; i < tp->size; i++) {
311 for (cp = tp->tab[i]; cp; cp = op) {
312 op = cp->cnext;
313 nh = hash(cp->nval, nsz);
314 cp->cnext = np[nh];
315 np[nh] = cp;
316 }
317 }
318 free(tp->tab);
319 tp->tab = np;
320 tp->size = nsz;
321 }
322
323 Cell *
324 lookup(const uchar *s, Array *tp) /* look for s in tp */
325 {
326 Cell *p;
327 int h;
328
329 h = hash(s, tp->size);
330 for (p = tp->tab[h]; p != NULL; p = p->cnext) {
331 if (strcmp((char *)s, (char *)p->nval) == 0)
332 return (p); /* found it */
333 }
334 return (NULL); /* not found */
335 }
336
337 Awkfloat
338 setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
339 {
340 int fldno;
341
342 if ((vp->tval & (NUM | STR)) == 0)
343 funnyvar(vp, "assign to");
344 if (isfld(vp)) {
345 donerec = 0; /* mark $0 invalid */
346 fldno = atoi((char *)vp->nval);
347 if (fldno > *NF)
348 newfld(fldno);
349 dprintf(("setting field %d to %g\n", fldno, f));
350 } else if (isrec(vp)) {
351 donefld = 0; /* mark $1... invalid */
352 donerec = 1;
353 }
354 if (freeable(vp))
355 xfree(vp->sval); /* free any previous string */
356 vp->tval &= ~STR; /* mark string invalid */
357 vp->tval |= NUM; /* mark number ok */
358 if (f == -0) /* who would have thought this possible? */
359 f = 0;
360 dprintf(("setfval %p: %s = %g, t=%o\n", (void *)vp,
361 vp->nval, f, vp->tval));
362 return (vp->fval = f);
363 }
364
365 void
366 funnyvar(Cell *vp, char *rw)
367 {
368 if (isarr(vp))
369 FATAL("can't %s %s; it's an array name.", rw, vp->nval);
370 if (vp->tval & FCN)
371 FATAL("can't %s %s; it's a function.", rw, vp->nval);
372 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
373 vp, vp->nval, vp->sval, vp->fval, vp->tval);
374 }
375
376 uchar *
377 setsval(Cell *vp, const uchar *s) /* set string val of a Cell */
378 {
379 uchar *t;
380 int fldno;
381
382 dprintf(("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
383 (void*)vp, vp->nval, s, vp->tval, donerec, donefld));
384 if ((vp->tval & (NUM | STR)) == 0)
385 funnyvar(vp, "assign to");
386 if (isfld(vp)) {
387 donerec = 0; /* mark $0 invalid */
388 fldno = atoi((const char *)vp->nval);
389 if (fldno > *NF)
390 newfld(fldno);
391 dprintf(("setting field %d to %s (%p)\n", fldno, s, (void *)s));
392 } else if (isrec(vp)) {
393 donefld = 0; /* mark $1... invalid */
394 donerec = 1;
395 }
396 t = tostring(s); /* in case it's self-assign */
397 if (freeable(vp))
398 xfree(vp->sval);
399 vp->tval &= ~NUM;
400 vp->tval |= STR;
401 vp->tval &= ~DONTFREE;
402 dprintf(("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
403 (void *)vp, vp->nval, t, (void *)t, vp->tval, donerec, donefld));
404 return (vp->sval = t);
405 }
406
407 Awkfloat
408 getfval(Cell *vp) /* get float val of a Cell */
409 {
410 if ((vp->tval & (NUM | STR)) == 0)
411 funnyvar(vp, "read value of");
412 if (isfld(vp) && donefld == 0)
413 fldbld();
414 else if (isrec(vp) && donerec == 0)
415 recbld();
416 if (!isnum(vp)) { /* not a number */
417 vp->fval = atof((const char *)vp->sval); /* best guess */
418 if (is_number(vp->sval) && !(vp->tval & CON))
419 vp->tval |= NUM; /* make NUM only sparingly */
420 }
421 dprintf(("getfval %p: %s = %g, t=%o\n",
422 (void *)vp, vp->nval, vp->fval, vp->tval));
423 return (vp->fval);
424 }
425
426 static uchar *
427 get_str_val(Cell *vp, uchar **fmt) /* get string val of a Cell */
428 {
429 uchar s[100]; /* BUG: unchecked */
430 double dtemp;
431
432 if ((vp->tval & (NUM | STR)) == 0)
433 funnyvar(vp, "read value of");
434 if (isfld(vp) && donefld == 0)
435 fldbld();
436 else if (isrec(vp) && donerec == 0)
437 recbld();
438 if (isstr(vp) == 0) {
439 if (freeable(vp))
440 xfree(vp->sval);
441 /* it's integral */
442 if (modf((long long)vp->fval, &dtemp) == 0) {
443 (void) snprintf((char *)s, sizeof (s),
444 "%.30g", vp->fval);
445 } else {
446 /*LINTED*/
447 (void) snprintf((char *)s, sizeof (s),
448 (char *)*fmt, vp->fval);
449 }
450 vp->sval = tostring(s);
451 vp->tval &= ~DONTFREE;
452 vp->tval |= STR;
453 }
454 dprintf(("getsval %p: %s = \"%s (%p)\", t=%o\n",
455 (void *)vp, vp->nval, vp->sval, (void *)vp->sval, vp->tval));
456 return (vp->sval);
457 }
458
459 uchar *
460 getsval(Cell *vp) /* get string val of a Cell */
461 {
462 return (get_str_val(vp, CONVFMT));
463 }
464
465 uchar *
466 getpssval(Cell *vp) /* get string val of a Cell for print */
467 {
468 return (get_str_val(vp, OFMT));
469 }
470
471 uchar *
472 tostring(const uchar *s) /* make a copy of string s */
473 {
474 uchar *p;
475
476 p = (uchar *)malloc(strlen((char *)s)+1);
477 if (p == NULL)
478 FATAL("out of space in tostring on %s", s);
479 (void) strcpy((char *)p, (char *)s);
480 return (p);
481 }
482
483 uchar *
484 qstring(const uchar *is, int delim) /* collect string up to delim */
485 {
486 const uchar *os = is;
487 int c, n;
488 uchar *s = (uchar *)is;
489 uchar *buf, *bp;
490
491 if ((buf = (uchar *)malloc(strlen((char *)is)+3)) == NULL)
492 FATAL("out of space in qstring(%s)", s);
493 for (bp = buf; (c = *s) != delim; s++) {
494 if (c == '\n') {
495 SYNTAX("newline in string %.20s...", os);
496 } else if (c != '\\') {
497 *bp++ = c;
498 } else { /* \something */
499 c = *++s;
500 if (c == 0) { /* \ at end */
501 *bp++ = '\\';
502 break; /* for loop */
503 }
504 switch (c) {
505 case '\\': *bp++ = '\\'; break;
506 case 'n': *bp++ = '\n'; break;
507 case 't': *bp++ = '\t'; break;
508 case 'b': *bp++ = '\b'; break;
509 case 'f': *bp++ = '\f'; break;
510 case 'r': *bp++ = '\r'; break;
511 default:
512 if (!isdigit(c)) {
513 *bp++ = c;
514 break;
515 }
516 n = c - '0';
517 if (isdigit(s[1])) {
518 n = 8 * n + *++s - '0';
519 if (isdigit(s[1]))
520 n = 8 * n + *++s - '0';
521 }
522 *bp++ = n;
523 break;
524 }
525 }
526 }
527 *bp++ = 0;
528 return (buf);
529 }