1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License (the "License").
   7  * You may not use this file except in compliance with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 
  23 /*
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  */
  26 /*
  27  * Copyright (c) 2013 by Delphix. All rights reserved.
  28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  29  */
  30 
  31 #include <string.h>
  32 #include <stdlib.h>
  33 #include <stdio.h>
  34 #include <assert.h>
  35 #include <ctype.h>
  36 #include <errno.h>
  37 
  38 #include <dt_impl.h>
  39 #include <dt_grammar.h>
  40 #include <dt_parser.h>
  41 #include <dt_string.h>
  42 
  43 /*
  44  * We need to undefine lex's input and unput macros so that references to these
  45  * call the functions provided at the end of this source file.
  46  */
  47 #undef input
  48 #undef unput
  49 
  50 static int id_or_type(const char *);
  51 static int input(void);
  52 static void unput(int);
  53 
  54 /*
  55  * We first define a set of labeled states for use in the D lexer and then a
  56  * set of regular expressions to simplify things below. The lexer states are:
  57  *
  58  * S0 - D program clause and expression lexing
  59  * S1 - D comments (i.e. skip everything until end of comment)
  60  * S2 - D program outer scope (probe specifiers and declarations)
  61  * S3 - D control line parsing (i.e. after ^# is seen but before \n)
  62  * S4 - D control line scan (locate control directives only and invoke S3)
  63  */
  64 %}
  65 
  66 %e 1500         /* maximum nodes */
  67 %p 4900         /* maximum positions */
  68 %n 600          /* maximum states */
  69 %a 3000         /* maximum transitions */
  70 
  71 %s S0 S1 S2 S3 S4
  72 
  73 RGX_AGG         "@"[a-zA-Z_][0-9a-zA-Z_]*
  74 RGX_PSPEC       [-$:a-zA-Z_.?*\\\[\]!][-$:0-9a-zA-Z_.`?*\\\[\]!]*
  75 RGX_ALTIDENT    [a-zA-Z_][0-9a-zA-Z_]*
  76 RGX_LMID        LM[0-9a-fA-F]+`
  77 RGX_MOD_IDENT   [a-zA-Z_`][0-9a-z.A-Z_`]*`
  78 RGX_IDENT       [a-zA-Z_`][0-9a-zA-Z_`]*
  79 RGX_INT         ([0-9]+|0[xX][0-9A-Fa-f]+)[uU]?[lL]?[lL]?
  80 RGX_FP          ([0-9]+("."?)[0-9]*|"."[0-9]+)((e|E)("+"|-)?[0-9]+)?[fFlL]?
  81 RGX_WS          [\f\n\r\t\v ]
  82 RGX_STR         ([^"\\\n]|\\[^"\n]|\\\")*
  83 RGX_CHR         ([^'\\\n]|\\[^'\n]|\\')*
  84 RGX_INTERP      ^[\f\t\v ]*#!.*
  85 RGX_CTL         ^[\f\t\v ]*#
  86 
  87 %%
  88 
  89 %{
  90 
  91 /*
  92  * We insert a special prologue into yylex() itself: if the pcb contains a
  93  * context token, we return that prior to running the normal lexer.  This
  94  * allows libdtrace to force yacc into one of our three parsing contexts: D
  95  * expression (DT_CTX_DEXPR), D program (DT_CTX_DPROG) or D type (DT_CTX_DTYPE).
  96  * Once the token is returned, we clear it so this only happens once.
  97  */
  98 if (yypcb->pcb_token != 0) {
  99         int tok = yypcb->pcb_token;
 100         yypcb->pcb_token = 0;
 101         return (tok);
 102 }
 103 
 104 %}
 105 
 106 <S0>auto  return (DT_KEY_AUTO);
 107 <S0>break return (DT_KEY_BREAK);
 108 <S0>case  return (DT_KEY_CASE);
 109 <S0>char  return (DT_KEY_CHAR);
 110 <S0>const return (DT_KEY_CONST);
 111 <S0>continue      return (DT_KEY_CONTINUE);
 112 <S0>counter       return (DT_KEY_COUNTER);
 113 <S0>default       return (DT_KEY_DEFAULT);
 114 <S0>do            return (DT_KEY_DO);
 115 <S0>double        return (DT_KEY_DOUBLE);
 116 <S0>else  return (DT_KEY_ELSE);
 117 <S0>enum  return (DT_KEY_ENUM);
 118 <S0>extern        return (DT_KEY_EXTERN);
 119 <S0>float return (DT_KEY_FLOAT);
 120 <S0>for           return (DT_KEY_FOR);
 121 <S0>goto  return (DT_KEY_GOTO);
 122 <S0>if            return (DT_KEY_IF);
 123 <S0>import        return (DT_KEY_IMPORT);
 124 <S0>inline        return (DT_KEY_INLINE);
 125 <S0>int           return (DT_KEY_INT);
 126 <S0>long  return (DT_KEY_LONG);
 127 <S0>offsetof      return (DT_TOK_OFFSETOF);
 128 <S0>probe return (DT_KEY_PROBE);
 129 <S0>provider      return (DT_KEY_PROVIDER);
 130 <S0>register      return (DT_KEY_REGISTER);
 131 <S0>restrict      return (DT_KEY_RESTRICT);
 132 <S0>return        return (DT_KEY_RETURN);
 133 <S0>self  return (DT_KEY_SELF);
 134 <S0>short return (DT_KEY_SHORT);
 135 <S0>signed        return (DT_KEY_SIGNED);
 136 <S0>sizeof        return (DT_TOK_SIZEOF);
 137 <S0>static        return (DT_KEY_STATIC);
 138 <S0>string        return (DT_KEY_STRING);
 139 <S0>stringof      return (DT_TOK_STRINGOF);
 140 <S0>struct        return (DT_KEY_STRUCT);
 141 <S0>switch        return (DT_KEY_SWITCH);
 142 <S0>this  return (DT_KEY_THIS);
 143 <S0>translator    return (DT_KEY_XLATOR);
 144 <S0>typedef       return (DT_KEY_TYPEDEF);
 145 <S0>union return (DT_KEY_UNION);
 146 <S0>unsigned      return (DT_KEY_UNSIGNED);
 147 <S0>userland      return (DT_KEY_USERLAND);
 148 <S0>void  return (DT_KEY_VOID);
 149 <S0>volatile      return (DT_KEY_VOLATILE);
 150 <S0>while return (DT_KEY_WHILE);
 151 <S0>xlate return (DT_TOK_XLATE);
 152 
 153 <S2>auto  { yybegin(YYS_EXPR);    return (DT_KEY_AUTO); }
 154 <S2>char  { yybegin(YYS_EXPR);    return (DT_KEY_CHAR); }
 155 <S2>const { yybegin(YYS_EXPR);    return (DT_KEY_CONST); }
 156 <S2>counter       { yybegin(YYS_DEFINE);  return (DT_KEY_COUNTER); }
 157 <S2>double        { yybegin(YYS_EXPR);    return (DT_KEY_DOUBLE); }
 158 <S2>enum  { yybegin(YYS_EXPR);    return (DT_KEY_ENUM); }
 159 <S2>extern        { yybegin(YYS_EXPR);    return (DT_KEY_EXTERN); }
 160 <S2>float { yybegin(YYS_EXPR);    return (DT_KEY_FLOAT); }
 161 <S2>import        { yybegin(YYS_EXPR);    return (DT_KEY_IMPORT); }
 162 <S2>inline        { yybegin(YYS_DEFINE);  return (DT_KEY_INLINE); }
 163 <S2>int           { yybegin(YYS_EXPR);    return (DT_KEY_INT); }
 164 <S2>long  { yybegin(YYS_EXPR);    return (DT_KEY_LONG); }
 165 <S2>provider      { yybegin(YYS_DEFINE);  return (DT_KEY_PROVIDER); }
 166 <S2>register      { yybegin(YYS_EXPR);    return (DT_KEY_REGISTER); }
 167 <S2>restrict      { yybegin(YYS_EXPR);    return (DT_KEY_RESTRICT); }
 168 <S2>self  { yybegin(YYS_EXPR);    return (DT_KEY_SELF); }
 169 <S2>short { yybegin(YYS_EXPR);    return (DT_KEY_SHORT); }
 170 <S2>signed        { yybegin(YYS_EXPR);    return (DT_KEY_SIGNED); }
 171 <S2>static        { yybegin(YYS_EXPR);    return (DT_KEY_STATIC); }
 172 <S2>string        { yybegin(YYS_EXPR);    return (DT_KEY_STRING); }
 173 <S2>struct        { yybegin(YYS_EXPR);    return (DT_KEY_STRUCT); }
 174 <S2>this  { yybegin(YYS_EXPR);    return (DT_KEY_THIS); }
 175 <S2>translator    { yybegin(YYS_DEFINE);  return (DT_KEY_XLATOR); }
 176 <S2>typedef       { yybegin(YYS_EXPR);    return (DT_KEY_TYPEDEF); }
 177 <S2>union { yybegin(YYS_EXPR);    return (DT_KEY_UNION); }
 178 <S2>unsigned      { yybegin(YYS_EXPR);    return (DT_KEY_UNSIGNED); }
 179 <S2>void  { yybegin(YYS_EXPR);    return (DT_KEY_VOID); }
 180 <S2>volatile      { yybegin(YYS_EXPR);    return (DT_KEY_VOLATILE); }
 181 
 182 <S0>"$$"[0-9]+    {
 183                         int i = atoi(yytext + 2);
 184                         char *v = "";
 185 
 186                         /*
 187                          * A macro argument reference substitutes the text of
 188                          * an argument in place of the current token.  When we
 189                          * see $$<d> we fetch the saved string from pcb_sargv
 190                          * (or use the default argument if the option has been
 191                          * set and the argument hasn't been specified) and
 192                          * return a token corresponding to this string.
 193                          */
 194                         if (i < 0 || (i >= yypcb->pcb_sargc &&
 195                             !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) {
 196                                 xyerror(D_MACRO_UNDEF, "macro argument %s is "
 197                                     "not defined\n", yytext);
 198                         }
 199 
 200                         if (i < yypcb->pcb_sargc) {
 201                                 v = yypcb->pcb_sargv[i]; /* get val from pcb */
 202                                 yypcb->pcb_sflagv[i] |= DT_IDFLG_REF;
 203                         }
 204 
 205                         if ((yylval.l_str = strdup(v)) == NULL)
 206                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 207 
 208                         (void) stresc2chr(yylval.l_str);
 209                         return (DT_TOK_STRING);
 210                 }
 211 
 212 <S0>"$"[0-9]+     {
 213                         int i = atoi(yytext + 1);
 214                         char *p, *v = "0";
 215 
 216                         /*
 217                          * A macro argument reference substitutes the text of
 218                          * one identifier or integer pattern for another.  When
 219                          * we see $<d> we fetch the saved string from pcb_sargv
 220                          * (or use the default argument if the option has been
 221                          * set and the argument hasn't been specified) and
 222                          * return a token corresponding to this string.
 223                          */
 224                         if (i < 0 || (i >= yypcb->pcb_sargc &&
 225                             !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) {
 226                                 xyerror(D_MACRO_UNDEF, "macro argument %s is "
 227                                     "not defined\n", yytext);
 228                         }
 229 
 230                         if (i < yypcb->pcb_sargc) {
 231                                 v = yypcb->pcb_sargv[i]; /* get val from pcb */
 232                                 yypcb->pcb_sflagv[i] |= DT_IDFLG_REF;
 233                         }
 234 
 235                         /*
 236                          * If the macro text is not a valid integer or ident,
 237                          * then we treat it as a string.  The string may be
 238                          * optionally enclosed in quotes, which we strip.
 239                          */
 240                         if (strbadidnum(v)) {
 241                                 size_t len = strlen(v);
 242 
 243                                 if (len != 1 && *v == '"' && v[len - 1] == '"')
 244                                         yylval.l_str = strndup(v + 1, len - 2);
 245                                 else
 246                                         yylval.l_str = strndup(v, len);
 247 
 248                                 if (yylval.l_str == NULL)
 249                                         longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 250 
 251                                 (void) stresc2chr(yylval.l_str);
 252                                 return (DT_TOK_STRING);
 253                         }
 254 
 255                         /*
 256                          * If the macro text is not a string an begins with a
 257                          * digit or a +/- sign, process it as an integer token.
 258                          */
 259                         if (isdigit(v[0]) || v[0] == '-' || v[0] == '+') {
 260                                 if (isdigit(v[0]))
 261                                         yyintprefix = 0;
 262                                 else
 263                                         yyintprefix = *v++;
 264 
 265                                 errno = 0;
 266                                 yylval.l_int = strtoull(v, &p, 0);
 267                                 (void) strncpy(yyintsuffix, p,
 268                                     sizeof (yyintsuffix));
 269                                 yyintdecimal = *v != '0';
 270 
 271                                 if (errno == ERANGE) {
 272                                         xyerror(D_MACRO_OFLOW, "macro argument"
 273                                             " %s constant %s results in integer"
 274                                             " overflow\n", yytext, v);
 275                                 }
 276 
 277                                 return (DT_TOK_INT);
 278                         }
 279 
 280                         return (id_or_type(v));
 281                 }
 282 
 283 <S0>"$$"{RGX_IDENT} {
 284                         dt_ident_t *idp = dt_idhash_lookup(
 285                             yypcb->pcb_hdl->dt_macros, yytext + 2);
 286 
 287                         char s[16]; /* enough for UINT_MAX + \0 */
 288 
 289                         if (idp == NULL) {
 290                                 xyerror(D_MACRO_UNDEF, "macro variable %s "
 291                                     "is not defined\n", yytext);
 292                         }
 293 
 294                         /*
 295                          * For the moment, all current macro variables are of
 296                          * type id_t (refer to dtrace_update() for details).
 297                          */
 298                         (void) snprintf(s, sizeof (s), "%u", idp->di_id);
 299                         if ((yylval.l_str = strdup(s)) == NULL)
 300                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 301 
 302                         return (DT_TOK_STRING);
 303                 }
 304 
 305 <S0>"$"{RGX_IDENT} {
 306                         dt_ident_t *idp = dt_idhash_lookup(
 307                             yypcb->pcb_hdl->dt_macros, yytext + 1);
 308 
 309                         if (idp == NULL) {
 310                                 xyerror(D_MACRO_UNDEF, "macro variable %s "
 311                                     "is not defined\n", yytext);
 312                         }
 313 
 314                         /*
 315                          * For the moment, all current macro variables are of
 316                          * type id_t (refer to dtrace_update() for details).
 317                          */
 318                         yylval.l_int = (intmax_t)(int)idp->di_id;
 319                         yyintprefix = 0;
 320                         yyintsuffix[0] = '\0';
 321                         yyintdecimal = 1;
 322 
 323                         return (DT_TOK_INT);
 324                 }
 325 
 326 <S0>{RGX_IDENT} |
 327 <S0>{RGX_MOD_IDENT}{RGX_IDENT} |
 328 <S0>{RGX_MOD_IDENT} {
 329                         return (id_or_type(yytext));
 330                 }
 331 
 332 <S0>{RGX_AGG}     {
 333                         if ((yylval.l_str = strdup(yytext)) == NULL)
 334                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 335                         return (DT_TOK_AGG);
 336                 }
 337 
 338 <S0>"@"           {
 339                         if ((yylval.l_str = strdup("@_")) == NULL)
 340                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 341                         return (DT_TOK_AGG);
 342                 }
 343 
 344 <S0>{RGX_INT}     |
 345 <S2>{RGX_INT}     |
 346 <S3>{RGX_INT}     {
 347                         char *p;
 348 
 349                         errno = 0;
 350                         yylval.l_int = strtoull(yytext, &p, 0);
 351                         yyintprefix = 0;
 352                         (void) strncpy(yyintsuffix, p, sizeof (yyintsuffix));
 353                         yyintdecimal = yytext[0] != '0';
 354 
 355                         if (errno == ERANGE) {
 356                                 xyerror(D_INT_OFLOW, "constant %s results in "
 357                                     "integer overflow\n", yytext);
 358                         }
 359 
 360                         if (*p != '\0' && strchr("uUlL", *p) == NULL) {
 361                                 xyerror(D_INT_DIGIT, "constant %s contains "
 362                                     "invalid digit %c\n", yytext, *p);
 363                         }
 364 
 365                         if ((YYSTATE) != S3)
 366                                 return (DT_TOK_INT);
 367 
 368                         yypragma = dt_node_link(yypragma,
 369                             dt_node_int(yylval.l_int));
 370                 }
 371 
 372 <S0>{RGX_FP}      yyerror("floating-point constants are not permitted\n");
 373 
 374 <S0>\"{RGX_STR}$ |
 375 <S3>\"{RGX_STR}$ xyerror(D_STR_NL, "newline encountered in string literal");
 376 
 377 <S0>\"{RGX_STR}\" |
 378 <S3>\"{RGX_STR}\" {
 379                         /*
 380                          * Quoted string -- convert C escape sequences and
 381                          * return the string as a token.
 382                          */
 383                         yylval.l_str = strndup(yytext + 1, yyleng - 2);
 384 
 385                         if (yylval.l_str == NULL)
 386                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 387 
 388                         (void) stresc2chr(yylval.l_str);
 389                         if ((YYSTATE) != S3)
 390                                 return (DT_TOK_STRING);
 391 
 392                         yypragma = dt_node_link(yypragma,
 393                             dt_node_string(yylval.l_str));
 394                 }
 395 
 396 <S0>'{RGX_CHR}$   xyerror(D_CHR_NL, "newline encountered in character constant");
 397 
 398 <S0>'{RGX_CHR}'   {
 399                         char *s, *p, *q;
 400                         size_t nbytes;
 401 
 402                         /*
 403                          * Character constant -- convert C escape sequences and
 404                          * return the character as an integer immediate value.
 405                          */
 406                         if (yyleng == 2)
 407                                 xyerror(D_CHR_NULL, "empty character constant");
 408 
 409                         s = yytext + 1;
 410                         yytext[yyleng - 1] = '\0';
 411                         nbytes = stresc2chr(s);
 412                         yylval.l_int = 0;
 413                         yyintprefix = 0;
 414                         yyintsuffix[0] = '\0';
 415                         yyintdecimal = 1;
 416 
 417                         if (nbytes > sizeof (yylval.l_int)) {
 418                                 xyerror(D_CHR_OFLOW, "character constant is "
 419                                     "too long");
 420                         }
 421 #ifdef _LITTLE_ENDIAN
 422                         p = ((char *)&yylval.l_int) + nbytes - 1;
 423                         for (q = s; nbytes != 0; nbytes--)
 424                                 *p-- = *q++;
 425 #else
 426                         bcopy(s, ((char *)&yylval.l_int) +
 427                             sizeof (yylval.l_int) - nbytes, nbytes);
 428 #endif
 429                         return (DT_TOK_INT);
 430                 }
 431 
 432 <S0>"/*"  |
 433 <S2>"/*"  {
 434                         yypcb->pcb_cstate = (YYSTATE);
 435                         BEGIN(S1);
 436                 }
 437 
 438 <S0>{RGX_INTERP} |
 439 <S2>{RGX_INTERP} ;        /* discard any #! lines */
 440 
 441 <S0>{RGX_CTL}     |
 442 <S2>{RGX_CTL}     |
 443 <S4>{RGX_CTL}     {
 444                         assert(yypragma == NULL);
 445                         yypcb->pcb_cstate = (YYSTATE);
 446                         BEGIN(S3);
 447                 }
 448 
 449 <S4>.             ;       /* discard */
 450 <S4>"\n"  ;       /* discard */
 451 
 452 <S0>"/"           {
 453                         int c, tok;
 454 
 455                         /*
 456                          * The use of "/" as the predicate delimiter and as the
 457                          * integer division symbol requires special lookahead
 458                          * to avoid a shift/reduce conflict in the D grammar.
 459                          * We look ahead to the next non-whitespace character.
 460                          * If we encounter EOF, ";", "{", or "/", then this "/"
 461                          * closes the predicate and we return DT_TOK_EPRED.
 462                          * If we encounter anything else, it's DT_TOK_DIV.
 463                          */
 464                         while ((c = input()) != 0) {
 465                                 if (strchr("\f\n\r\t\v ", c) == NULL)
 466                                         break;
 467                         }
 468 
 469                         if (c == 0 || c == ';' || c == '{' || c == '/') {
 470                                 if (yypcb->pcb_parens != 0) {
 471                                         yyerror("closing ) expected in "
 472                                             "predicate before /\n");
 473                                 }
 474                                 if (yypcb->pcb_brackets != 0) {
 475                                         yyerror("closing ] expected in "
 476                                             "predicate before /\n");
 477                                 }
 478                                 tok = DT_TOK_EPRED;
 479                         } else
 480                                 tok = DT_TOK_DIV;
 481 
 482                         unput(c);
 483                         return (tok);
 484                 }
 485 
 486 <S0>"("           {
 487                         yypcb->pcb_parens++;
 488                         return (DT_TOK_LPAR);
 489                 }
 490 
 491 <S0>")"           {
 492                         if (--yypcb->pcb_parens < 0)
 493                                 yyerror("extra ) in input stream\n");
 494                         return (DT_TOK_RPAR);
 495                 }
 496 
 497 <S0>"["           {
 498                         yypcb->pcb_brackets++;
 499                         return (DT_TOK_LBRAC);
 500                 }
 501 
 502 <S0>"]"           {
 503                         if (--yypcb->pcb_brackets < 0)
 504                                 yyerror("extra ] in input stream\n");
 505                         return (DT_TOK_RBRAC);
 506                 }
 507 
 508 <S0>"{"           |
 509 <S2>"{"           {
 510                         yypcb->pcb_braces++;
 511                         return ('{');
 512                 }
 513 
 514 <S0>"}"           {
 515                         if (--yypcb->pcb_braces < 0)
 516                                 yyerror("extra } in input stream\n");
 517                         return ('}');
 518                 }
 519 
 520 <S0>"|"           return (DT_TOK_BOR);
 521 <S0>"^"           return (DT_TOK_XOR);
 522 <S0>"&"               return (DT_TOK_BAND);
 523 <S0>"&&"  return (DT_TOK_LAND);
 524 <S0>"^^"  return (DT_TOK_LXOR);
 525 <S0>"||"  return (DT_TOK_LOR);
 526 <S0>"=="  return (DT_TOK_EQU);
 527 <S0>"!="  return (DT_TOK_NEQ);
 528 <S0>"<"                return (DT_TOK_LT);
 529 <S0>"<="       return (DT_TOK_LE);
 530 <S0>">"                return (DT_TOK_GT);
 531 <S0>">="       return (DT_TOK_GE);
 532 <S0>"<<"    return (DT_TOK_LSH);
 533 <S0>">>"    return (DT_TOK_RSH);
 534 <S0>"+"           return (DT_TOK_ADD);
 535 <S0>"-"           return (DT_TOK_SUB);
 536 <S0>"*"           return (DT_TOK_MUL);
 537 <S0>"%"           return (DT_TOK_MOD);
 538 <S0>"~"           return (DT_TOK_BNEG);
 539 <S0>"!"           return (DT_TOK_LNEG);
 540 <S0>"?"           return (DT_TOK_QUESTION);
 541 <S0>":"           return (DT_TOK_COLON);
 542 <S0>"."           return (DT_TOK_DOT);
 543 <S0>"->"       return (DT_TOK_PTR);
 544 <S0>"="           return (DT_TOK_ASGN);
 545 <S0>"+="  return (DT_TOK_ADD_EQ);
 546 <S0>"-="  return (DT_TOK_SUB_EQ);
 547 <S0>"*="  return (DT_TOK_MUL_EQ);
 548 <S0>"/="  return (DT_TOK_DIV_EQ);
 549 <S0>"%="  return (DT_TOK_MOD_EQ);
 550 <S0>"&="      return (DT_TOK_AND_EQ);
 551 <S0>"^="  return (DT_TOK_XOR_EQ);
 552 <S0>"|="  return (DT_TOK_OR_EQ);
 553 <S0>"<<="   return (DT_TOK_LSH_EQ);
 554 <S0>">>="   return (DT_TOK_RSH_EQ);
 555 <S0>"++"  return (DT_TOK_ADDADD);
 556 <S0>"--"  return (DT_TOK_SUBSUB);
 557 <S0>"..." return (DT_TOK_ELLIPSIS);
 558 <S0>","           return (DT_TOK_COMMA);
 559 <S0>";"           return (';');
 560 <S0>{RGX_WS}      ; /* discard */
 561 <S0>"\\"\n        ; /* discard */
 562 <S0>.             yyerror("syntax error near \"%c\"\n", yytext[0]);
 563 
 564 <S1>"/*"  yyerror("/* encountered inside a comment\n");
 565 <S1>"*/"  BEGIN(yypcb->pcb_cstate);
 566 <S1>.|\n  ; /* discard */
 567 
 568 <S2>{RGX_PSPEC}   {
 569                         /*
 570                          * S2 has an ambiguity because RGX_PSPEC includes '*'
 571                          * as a glob character and '*' also can be DT_TOK_STAR.
 572                          * Since lex always matches the longest token, this
 573                          * rule can be matched by an input string like "int*",
 574                          * which could begin a global variable declaration such
 575                          * as "int*x;" or could begin a RGX_PSPEC with globbing
 576                          * such as "int* { trace(timestamp); }".  If C_PSPEC is
 577                          * not set, we must resolve the ambiguity in favor of
 578                          * the type and perform lexer pushback if the fragment
 579                          * before '*' or entire fragment matches a type name.
 580                          * If C_PSPEC is set, we always return a PSPEC token.
 581                          * If C_PSPEC is off, the user can avoid ambiguity by
 582                          * including a ':' delimiter in the specifier, which
 583                          * they should be doing anyway to specify the provider.
 584                          */
 585                         if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) &&
 586                             strchr(yytext, ':') == NULL) {
 587 
 588                                 char *p = strchr(yytext, '*');
 589                                 char *q = yytext + yyleng - 1;
 590 
 591                                 if (p != NULL && p > yytext)
 592                                         *p = '\0'; /* prune yytext */
 593 
 594                                 if (dt_type_lookup(yytext, NULL) == 0) {
 595                                         yylval.l_str = strdup(yytext);
 596 
 597                                         if (yylval.l_str == NULL) {
 598                                                 longjmp(yypcb->pcb_jmpbuf,
 599                                                     EDT_NOMEM);
 600                                         }
 601 
 602                                         if (p != NULL && p > yytext) {
 603                                                 for (*p = '*'; q >= p; q--)
 604                                                         unput(*q);
 605                                         }
 606 
 607                                         yybegin(YYS_EXPR);
 608                                         return (DT_TOK_TNAME);
 609                                 }
 610 
 611                                 if (p != NULL && p > yytext)
 612                                         *p = '*'; /* restore yytext */
 613                         }
 614 
 615                         if ((yylval.l_str = strdup(yytext)) == NULL)
 616                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 617 
 618                         return (DT_TOK_PSPEC);
 619                 }
 620 
 621 <S2>"/"           return (DT_TOK_DIV);
 622 <S2>","           return (DT_TOK_COMMA);
 623 
 624 <S2>{RGX_WS}      ; /* discard */
 625 <S2>.             yyerror("syntax error near \"%c\"\n", yytext[0]);
 626 
 627 <S3>\n            {
 628                         dt_pragma(yypragma);
 629                         yypragma = NULL;
 630                         BEGIN(yypcb->pcb_cstate);
 631                 }
 632 
 633 <S3>[\f\t\v ]+    ; /* discard */
 634 
 635 <S3>[^\f\n\t\v "]+ {
 636                         dt_node_t *dnp;
 637 
 638                         if ((yylval.l_str = strdup(yytext)) == NULL)
 639                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 640 
 641                         /*
 642                          * We want to call dt_node_ident() here, but we can't
 643                          * because it will expand inlined identifiers, which we
 644                          * don't want to do from #pragma context in order to
 645                          * support pragmas that apply to the ident itself.  We
 646                          * call dt_node_string() and then reset dn_op instead.
 647                          */
 648                         dnp = dt_node_string(yylval.l_str);
 649                         dnp->dn_kind = DT_NODE_IDENT;
 650                         dnp->dn_op = DT_TOK_IDENT;
 651                         yypragma = dt_node_link(yypragma, dnp);
 652                 }
 653 
 654 <S3>.             yyerror("syntax error near \"%c\"\n", yytext[0]);
 655 
 656 %%
 657 
 658 /*
 659  * yybegin provides a wrapper for use from C code around the lex BEGIN() macro.
 660  * We use two main states for lexing because probe descriptions use a syntax
 661  * that is incompatible with the normal D tokens (e.g. names can contain "-").
 662  * yybegin also handles the job of switching between two lists of dt_nodes
 663  * as we allocate persistent definitions, like inlines, and transient nodes
 664  * that will be freed once we are done parsing the current program file.
 665  */
 666 void
 667 yybegin(yystate_t state)
 668 {
 669 #ifdef  YYDEBUG
 670         yydebug = _dtrace_debug;
 671 #endif
 672         if (yypcb->pcb_yystate == state)
 673                 return; /* nothing to do if we're in the state already */
 674 
 675         if (yypcb->pcb_yystate == YYS_DEFINE) {
 676                 yypcb->pcb_list = yypcb->pcb_hold;
 677                 yypcb->pcb_hold = NULL;
 678         }
 679 
 680         switch (state) {
 681         case YYS_CLAUSE:
 682                 BEGIN(S2);
 683                 break;
 684         case YYS_DEFINE:
 685                 assert(yypcb->pcb_hold == NULL);
 686                 yypcb->pcb_hold = yypcb->pcb_list;
 687                 yypcb->pcb_list = NULL;
 688                 /*FALLTHRU*/
 689         case YYS_EXPR:
 690                 BEGIN(S0);
 691                 break;
 692         case YYS_DONE:
 693                 break;
 694         case YYS_CONTROL:
 695                 BEGIN(S4);
 696                 break;
 697         default:
 698                 xyerror(D_UNKNOWN, "internal error -- bad yystate %d\n", state);
 699         }
 700 
 701         yypcb->pcb_yystate = state;
 702 }
 703 
 704 void
 705 yyinit(dt_pcb_t *pcb)
 706 {
 707         yypcb = pcb;
 708         yylineno = 1;
 709         yypragma = NULL;
 710         yysptr = yysbuf;
 711 }
 712 
 713 /*
 714  * Given a lexeme 's' (typically yytext), set yylval and return an appropriate
 715  * token to the parser indicating either an identifier or a typedef name.
 716  * User-defined global variables always take precedence over types, but we do
 717  * use some heuristics because D programs can look at an ever-changing set of
 718  * kernel types and also can implicitly instantiate variables by assignment,
 719  * unlike in C.  The code here is ordered carefully as lookups are not cheap.
 720  */
 721 static int
 722 id_or_type(const char *s)
 723 {
 724         dtrace_hdl_t *dtp = yypcb->pcb_hdl;
 725         dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl;
 726         int c0, c1, ttok = DT_TOK_TNAME;
 727         dt_ident_t *idp;
 728 
 729         if ((s = yylval.l_str = strdup(s)) == NULL)
 730                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 731 
 732         /*
 733          * If the lexeme is a global variable or likely identifier or *not* a
 734          * type_name, then it is an identifier token.
 735          */
 736         if (dt_idstack_lookup(&yypcb->pcb_globals, s) != NULL ||
 737             dt_idhash_lookup(yypcb->pcb_idents, s) != NULL ||
 738             dt_type_lookup(s, NULL) != 0)
 739                 return (DT_TOK_IDENT);
 740 
 741         /*
 742          * If we're in the midst of parsing a declaration and a type_specifier
 743          * has already been shifted, then return DT_TOK_IDENT instead of TNAME.
 744          * This semantic is necessary to permit valid ISO C code such as:
 745          *
 746          * typedef int foo;
 747          * struct s { foo foo; };
 748          *
 749          * without causing shift/reduce conflicts in the direct_declarator part
 750          * of the grammar.  The result is that we must check for conflicting
 751          * redeclarations of the same identifier as part of dt_node_decl().
 752          */
 753         if (ddp != NULL && ddp->dd_name != NULL)
 754                 return (DT_TOK_IDENT);
 755 
 756         /*
 757          * If the lexeme is a type name and we are not in a program clause,
 758          * then always interpret it as a type and return DT_TOK_TNAME.
 759          */
 760         if ((YYSTATE) != S0)
 761                 return (DT_TOK_TNAME);
 762 
 763         /*
 764          * If the lexeme matches a type name but is in a program clause, then
 765          * it could be a type or it could be an undefined variable.  Peek at
 766          * the next token to decide.  If we see ++, --, [, or =, we know there
 767          * might be an assignment that is trying to create a global variable,
 768          * so we optimistically return DT_TOK_IDENT.  There is no harm in being
 769          * wrong: a type_name followed by ++, --, [, or = is a syntax error.
 770          */
 771         while ((c0 = input()) != 0) {
 772                 if (strchr("\f\n\r\t\v ", c0) == NULL)
 773                         break;
 774         }
 775 
 776         switch (c0) {
 777         case '+':
 778         case '-':
 779                 if ((c1 = input()) == c0)
 780                         ttok = DT_TOK_IDENT;
 781                 unput(c1);
 782                 break;
 783 
 784         case '=':
 785                 if ((c1 = input()) != c0)
 786                         ttok = DT_TOK_IDENT;
 787                 unput(c1);
 788                 break;
 789         case '[':
 790                 ttok = DT_TOK_IDENT;
 791                 break;
 792         }
 793 
 794         if (ttok == DT_TOK_IDENT) {
 795                 idp = dt_idhash_insert(yypcb->pcb_idents, s, DT_IDENT_SCALAR, 0,
 796                     0, _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen);
 797 
 798                 if (idp == NULL)
 799                         longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 800         }
 801 
 802         unput(c0);
 803         return (ttok);
 804 }
 805 
 806 static int
 807 input(void)
 808 {
 809         int c;
 810 
 811         if (yysptr > yysbuf)
 812                 c = *--yysptr;
 813         else if (yypcb->pcb_fileptr != NULL)
 814                 c = fgetc(yypcb->pcb_fileptr);
 815         else if (yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen)
 816                 c = *(unsigned char *)(yypcb->pcb_strptr++);
 817         else
 818                 c = EOF;
 819 
 820         if (c == '\n')
 821                 yylineno++;
 822 
 823         if (c != EOF)
 824                 return (c);
 825 
 826         if ((YYSTATE) == S1)
 827                 yyerror("end-of-file encountered before matching */\n");
 828 
 829         if ((YYSTATE) == S3)
 830                 yyerror("end-of-file encountered before end of control line\n");
 831 
 832         if (yypcb->pcb_fileptr != NULL && ferror(yypcb->pcb_fileptr))
 833                 longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
 834 
 835         return (0); /* EOF */
 836 }
 837 
 838 static void
 839 unput(int c)
 840 {
 841         if (c == '\n')
 842                 yylineno--;
 843 
 844         *yysptr++ = c;
 845         yytchar = c;
 846 }