1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License (the "License").
   7  * You may not use this file except in compliance with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 
  23 /*
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  */
  26 
  27 #include <string.h>
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <assert.h>
  31 #include <ctype.h>
  32 #include <errno.h>
  33 
  34 #include <dt_impl.h>
  35 #include <dt_grammar.h>
  36 #include <dt_parser.h>
  37 #include <dt_string.h>
  38 
  39 /*
  40  * We need to undefine lex's input and unput macros so that references to these
  41  * call the functions provided at the end of this source file.
  42  */
  43 #undef input
  44 #undef unput
  45 
  46 static int id_or_type(const char *);
  47 static int input(void);
  48 static void unput(int);
  49 
  50 /*
  51  * We first define a set of labeled states for use in the D lexer and then a
  52  * set of regular expressions to simplify things below. The lexer states are:
  53  *
  54  * S0 - D program clause and expression lexing
  55  * S1 - D comments (i.e. skip everything until end of comment)
  56  * S2 - D program outer scope (probe specifiers and declarations)
  57  * S3 - D control line parsing (i.e. after ^# is seen but before \n)
  58  * S4 - D control line scan (locate control directives only and invoke S3)
  59  */
  60 %}
  61 
  62 %e 1500         /* maximum nodes */
  63 %p 3700         /* maximum positions */
  64 %n 600          /* maximum states */
  65 
  66 %s S0 S1 S2 S3 S4
  67 
  68 RGX_AGG         "@"[a-zA-Z_][0-9a-zA-Z_]*
  69 RGX_PSPEC       [-$:a-zA-Z_.?*\\\[\]!][-$:0-9a-zA-Z_.`?*\\\[\]!]*
  70 RGX_IDENT       [a-zA-Z_`][0-9a-zA-Z_`]*
  71 RGX_INT         ([0-9]+|0[xX][0-9A-Fa-f]+)[uU]?[lL]?[lL]?
  72 RGX_FP          ([0-9]+("."?)[0-9]*|"."[0-9]+)((e|E)("+"|-)?[0-9]+)?[fFlL]?
  73 RGX_WS          [\f\n\r\t\v ]
  74 RGX_STR         ([^"\\\n]|\\[^"\n]|\\\")*
  75 RGX_CHR         ([^'\\\n]|\\[^'\n]|\\')*
  76 RGX_INTERP      ^[\f\t\v ]*#!.*
  77 RGX_CTL         ^[\f\t\v ]*#
  78 
  79 %%
  80 
  81 %{
  82 
  83 /*
  84  * We insert a special prologue into yylex() itself: if the pcb contains a
  85  * context token, we return that prior to running the normal lexer.  This
  86  * allows libdtrace to force yacc into one of our three parsing contexts: D
  87  * expression (DT_CTX_DEXPR), D program (DT_CTX_DPROG) or D type (DT_CTX_DTYPE).
  88  * Once the token is returned, we clear it so this only happens once.
  89  */
  90 if (yypcb->pcb_token != 0) {
  91         int tok = yypcb->pcb_token;
  92         yypcb->pcb_token = 0;
  93         return (tok);
  94 }
  95 
  96 %}
  97 
  98 <S0>auto  return (DT_KEY_AUTO);
  99 <S0>break return (DT_KEY_BREAK);
 100 <S0>case  return (DT_KEY_CASE);
 101 <S0>char  return (DT_KEY_CHAR);
 102 <S0>const return (DT_KEY_CONST);
 103 <S0>continue      return (DT_KEY_CONTINUE);
 104 <S0>counter       return (DT_KEY_COUNTER);
 105 <S0>default       return (DT_KEY_DEFAULT);
 106 <S0>do            return (DT_KEY_DO);
 107 <S0>double        return (DT_KEY_DOUBLE);
 108 <S0>else  return (DT_KEY_ELSE);
 109 <S0>enum  return (DT_KEY_ENUM);
 110 <S0>extern        return (DT_KEY_EXTERN);
 111 <S0>float return (DT_KEY_FLOAT);
 112 <S0>for           return (DT_KEY_FOR);
 113 <S0>goto  return (DT_KEY_GOTO);
 114 <S0>if            return (DT_KEY_IF);
 115 <S0>import        return (DT_KEY_IMPORT);
 116 <S0>inline        return (DT_KEY_INLINE);
 117 <S0>int           return (DT_KEY_INT);
 118 <S0>long  return (DT_KEY_LONG);
 119 <S0>offsetof      return (DT_TOK_OFFSETOF);
 120 <S0>probe return (DT_KEY_PROBE);
 121 <S0>provider      return (DT_KEY_PROVIDER);
 122 <S0>register      return (DT_KEY_REGISTER);
 123 <S0>restrict      return (DT_KEY_RESTRICT);
 124 <S0>return        return (DT_KEY_RETURN);
 125 <S0>self  return (DT_KEY_SELF);
 126 <S0>short return (DT_KEY_SHORT);
 127 <S0>signed        return (DT_KEY_SIGNED);
 128 <S0>sizeof        return (DT_TOK_SIZEOF);
 129 <S0>static        return (DT_KEY_STATIC);
 130 <S0>string        return (DT_KEY_STRING);
 131 <S0>stringof      return (DT_TOK_STRINGOF);
 132 <S0>struct        return (DT_KEY_STRUCT);
 133 <S0>switch        return (DT_KEY_SWITCH);
 134 <S0>this  return (DT_KEY_THIS);
 135 <S0>translator    return (DT_KEY_XLATOR);
 136 <S0>typedef       return (DT_KEY_TYPEDEF);
 137 <S0>union return (DT_KEY_UNION);
 138 <S0>unsigned      return (DT_KEY_UNSIGNED);
 139 <S0>void  return (DT_KEY_VOID);
 140 <S0>volatile      return (DT_KEY_VOLATILE);
 141 <S0>while return (DT_KEY_WHILE);
 142 <S0>xlate return (DT_TOK_XLATE);
 143 
 144 <S2>auto  { yybegin(YYS_EXPR);    return (DT_KEY_AUTO); }
 145 <S2>char  { yybegin(YYS_EXPR);    return (DT_KEY_CHAR); }
 146 <S2>const { yybegin(YYS_EXPR);    return (DT_KEY_CONST); }
 147 <S2>counter       { yybegin(YYS_DEFINE);  return (DT_KEY_COUNTER); }
 148 <S2>double        { yybegin(YYS_EXPR);    return (DT_KEY_DOUBLE); }
 149 <S2>enum  { yybegin(YYS_EXPR);    return (DT_KEY_ENUM); }
 150 <S2>extern        { yybegin(YYS_EXPR);    return (DT_KEY_EXTERN); }
 151 <S2>float { yybegin(YYS_EXPR);    return (DT_KEY_FLOAT); }
 152 <S2>import        { yybegin(YYS_EXPR);    return (DT_KEY_IMPORT); }
 153 <S2>inline        { yybegin(YYS_DEFINE);  return (DT_KEY_INLINE); }
 154 <S2>int           { yybegin(YYS_EXPR);    return (DT_KEY_INT); }
 155 <S2>long  { yybegin(YYS_EXPR);    return (DT_KEY_LONG); }
 156 <S2>provider      { yybegin(YYS_DEFINE);  return (DT_KEY_PROVIDER); }
 157 <S2>register      { yybegin(YYS_EXPR);    return (DT_KEY_REGISTER); }
 158 <S2>restrict      { yybegin(YYS_EXPR);    return (DT_KEY_RESTRICT); }
 159 <S2>self  { yybegin(YYS_EXPR);    return (DT_KEY_SELF); }
 160 <S2>short { yybegin(YYS_EXPR);    return (DT_KEY_SHORT); }
 161 <S2>signed        { yybegin(YYS_EXPR);    return (DT_KEY_SIGNED); }
 162 <S2>static        { yybegin(YYS_EXPR);    return (DT_KEY_STATIC); }
 163 <S2>string        { yybegin(YYS_EXPR);    return (DT_KEY_STRING); }
 164 <S2>struct        { yybegin(YYS_EXPR);    return (DT_KEY_STRUCT); }
 165 <S2>this  { yybegin(YYS_EXPR);    return (DT_KEY_THIS); }
 166 <S2>translator    { yybegin(YYS_DEFINE);  return (DT_KEY_XLATOR); }
 167 <S2>typedef       { yybegin(YYS_EXPR);    return (DT_KEY_TYPEDEF); }
 168 <S2>union { yybegin(YYS_EXPR);    return (DT_KEY_UNION); }
 169 <S2>unsigned      { yybegin(YYS_EXPR);    return (DT_KEY_UNSIGNED); }
 170 <S2>void  { yybegin(YYS_EXPR);    return (DT_KEY_VOID); }
 171 <S2>volatile      { yybegin(YYS_EXPR);    return (DT_KEY_VOLATILE); }
 172 
 173 <S0>"$$"[0-9]+    {
 174                         int i = atoi(yytext + 2);
 175                         char *v = "";
 176 
 177                         /*
 178                          * A macro argument reference substitutes the text of
 179                          * an argument in place of the current token.  When we
 180                          * see $$<d> we fetch the saved string from pcb_sargv
 181                          * (or use the default argument if the option has been
 182                          * set and the argument hasn't been specified) and
 183                          * return a token corresponding to this string.
 184                          */
 185                         if (i < 0 || (i >= yypcb->pcb_sargc &&
 186                             !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) {
 187                                 xyerror(D_MACRO_UNDEF, "macro argument %s is "
 188                                     "not defined\n", yytext);
 189                         }
 190 
 191                         if (i < yypcb->pcb_sargc) {
 192                                 v = yypcb->pcb_sargv[i]; /* get val from pcb */
 193                                 yypcb->pcb_sflagv[i] |= DT_IDFLG_REF;
 194                         }
 195 
 196                         if ((yylval.l_str = strdup(v)) == NULL)
 197                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 198 
 199                         (void) stresc2chr(yylval.l_str);
 200                         return (DT_TOK_STRING);
 201                 }
 202 
 203 <S0>"$"[0-9]+     {
 204                         int i = atoi(yytext + 1);
 205                         char *p, *v = "0";
 206 
 207                         /*
 208                          * A macro argument reference substitutes the text of
 209                          * one identifier or integer pattern for another.  When
 210                          * we see $<d> we fetch the saved string from pcb_sargv
 211                          * (or use the default argument if the option has been
 212                          * set and the argument hasn't been specified) and
 213                          * return a token corresponding to this string.
 214                          */
 215                         if (i < 0 || (i >= yypcb->pcb_sargc &&
 216                             !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) {
 217                                 xyerror(D_MACRO_UNDEF, "macro argument %s is "
 218                                     "not defined\n", yytext);
 219                         }
 220 
 221                         if (i < yypcb->pcb_sargc) {
 222                                 v = yypcb->pcb_sargv[i]; /* get val from pcb */
 223                                 yypcb->pcb_sflagv[i] |= DT_IDFLG_REF;
 224                         }
 225 
 226                         /*
 227                          * If the macro text is not a valid integer or ident,
 228                          * then we treat it as a string.  The string may be
 229                          * optionally enclosed in quotes, which we strip.
 230                          */
 231                         if (strbadidnum(v)) {
 232                                 size_t len = strlen(v);
 233 
 234                                 if (len != 1 && *v == '"' && v[len - 1] == '"')
 235                                         yylval.l_str = strndup(v + 1, len - 2);
 236                                 else
 237                                         yylval.l_str = strndup(v, len);
 238 
 239                                 if (yylval.l_str == NULL)
 240                                         longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 241 
 242                                 (void) stresc2chr(yylval.l_str);
 243                                 return (DT_TOK_STRING);
 244                         }
 245 
 246                         /*
 247                          * If the macro text is not a string an begins with a
 248                          * digit or a +/- sign, process it as an integer token.
 249                          */
 250                         if (isdigit(v[0]) || v[0] == '-' || v[0] == '+') {
 251                                 if (isdigit(v[0]))
 252                                         yyintprefix = 0;
 253                                 else
 254                                         yyintprefix = *v++;
 255 
 256                                 errno = 0;
 257                                 yylval.l_int = strtoull(v, &p, 0);
 258                                 (void) strncpy(yyintsuffix, p,
 259                                     sizeof (yyintsuffix));
 260                                 yyintdecimal = *v != '0';
 261 
 262                                 if (errno == ERANGE) {
 263                                         xyerror(D_MACRO_OFLOW, "macro argument"
 264                                             " %s constant %s results in integer"
 265                                             " overflow\n", yytext, v);
 266                                 }
 267 
 268                                 return (DT_TOK_INT);
 269                         }
 270 
 271                         return (id_or_type(v));
 272                 }
 273 
 274 <S0>"$$"{RGX_IDENT} {
 275                         dt_ident_t *idp = dt_idhash_lookup(
 276                             yypcb->pcb_hdl->dt_macros, yytext + 2);
 277 
 278                         char s[16]; /* enough for UINT_MAX + \0 */
 279 
 280                         if (idp == NULL) {
 281                                 xyerror(D_MACRO_UNDEF, "macro variable %s "
 282                                     "is not defined\n", yytext);
 283                         }
 284 
 285                         /*
 286                          * For the moment, all current macro variables are of
 287                          * type id_t (refer to dtrace_update() for details).
 288                          */
 289                         (void) snprintf(s, sizeof (s), "%u", idp->di_id);
 290                         if ((yylval.l_str = strdup(s)) == NULL)
 291                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 292 
 293                         return (DT_TOK_STRING);
 294                 }
 295 
 296 <S0>"$"{RGX_IDENT} {
 297                         dt_ident_t *idp = dt_idhash_lookup(
 298                             yypcb->pcb_hdl->dt_macros, yytext + 1);
 299 
 300                         if (idp == NULL) {
 301                                 xyerror(D_MACRO_UNDEF, "macro variable %s "
 302                                     "is not defined\n", yytext);
 303                         }
 304 
 305                         /*
 306                          * For the moment, all current macro variables are of
 307                          * type id_t (refer to dtrace_update() for details).
 308                          */
 309                         yylval.l_int = (intmax_t)(int)idp->di_id;
 310                         yyintprefix = 0;
 311                         yyintsuffix[0] = '\0';
 312                         yyintdecimal = 1;
 313 
 314                         return (DT_TOK_INT);
 315                 }
 316 
 317 <S0>{RGX_IDENT}   {
 318                         return (id_or_type(yytext));
 319                 }
 320 
 321 <S0>{RGX_AGG}     {
 322                         if ((yylval.l_str = strdup(yytext)) == NULL)
 323                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 324                         return (DT_TOK_AGG);
 325                 }
 326 
 327 <S0>"@"           {
 328                         if ((yylval.l_str = strdup("@_")) == NULL)
 329                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 330                         return (DT_TOK_AGG);
 331                 }
 332 
 333 <S0>{RGX_INT}     |
 334 <S2>{RGX_INT}     |
 335 <S3>{RGX_INT}     {
 336                         char *p;
 337 
 338                         errno = 0;
 339                         yylval.l_int = strtoull(yytext, &p, 0);
 340                         yyintprefix = 0;
 341                         (void) strncpy(yyintsuffix, p, sizeof (yyintsuffix));
 342                         yyintdecimal = yytext[0] != '0';
 343 
 344                         if (errno == ERANGE) {
 345                                 xyerror(D_INT_OFLOW, "constant %s results in "
 346                                     "integer overflow\n", yytext);
 347                         }
 348 
 349                         if (*p != '\0' && strchr("uUlL", *p) == NULL) {
 350                                 xyerror(D_INT_DIGIT, "constant %s contains "
 351                                     "invalid digit %c\n", yytext, *p);
 352                         }
 353 
 354                         if ((YYSTATE) != S3)
 355                                 return (DT_TOK_INT);
 356 
 357                         yypragma = dt_node_link(yypragma,
 358                             dt_node_int(yylval.l_int));
 359                 }
 360 
 361 <S0>{RGX_FP}      yyerror("floating-point constants are not permitted\n");
 362 
 363 <S0>\"{RGX_STR}$ |
 364 <S3>\"{RGX_STR}$ xyerror(D_STR_NL, "newline encountered in string literal");
 365 
 366 <S0>\"{RGX_STR}\" |
 367 <S3>\"{RGX_STR}\" {
 368                         /*
 369                          * Quoted string -- convert C escape sequences and
 370                          * return the string as a token.
 371                          */
 372                         yylval.l_str = strndup(yytext + 1, yyleng - 2);
 373 
 374                         if (yylval.l_str == NULL)
 375                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 376 
 377                         (void) stresc2chr(yylval.l_str);
 378                         if ((YYSTATE) != S3)
 379                                 return (DT_TOK_STRING);
 380 
 381                         yypragma = dt_node_link(yypragma,
 382                             dt_node_string(yylval.l_str));
 383                 }
 384 
 385 <S0>'{RGX_CHR}$   xyerror(D_CHR_NL, "newline encountered in character constant");
 386 
 387 <S0>'{RGX_CHR}'   {
 388                         char *s, *p, *q;
 389                         size_t nbytes;
 390 
 391                         /*
 392                          * Character constant -- convert C escape sequences and
 393                          * return the character as an integer immediate value.
 394                          */
 395                         if (yyleng == 2)
 396                                 xyerror(D_CHR_NULL, "empty character constant");
 397 
 398                         s = yytext + 1;
 399                         yytext[yyleng - 1] = '\0';
 400                         nbytes = stresc2chr(s);
 401                         yylval.l_int = 0;
 402                         yyintprefix = 0;
 403                         yyintsuffix[0] = '\0';
 404                         yyintdecimal = 1;
 405 
 406                         if (nbytes > sizeof (yylval.l_int)) {
 407                                 xyerror(D_CHR_OFLOW, "character constant is "
 408                                     "too long");
 409                         }
 410 #ifdef _LITTLE_ENDIAN
 411                         p = ((char *)&yylval.l_int) + nbytes - 1;
 412                         for (q = s; nbytes != 0; nbytes--)
 413                                 *p-- = *q++;
 414 #else
 415                         bcopy(s, ((char *)&yylval.l_int) +
 416                             sizeof (yylval.l_int) - nbytes, nbytes);
 417 #endif
 418                         return (DT_TOK_INT);
 419                 }
 420 
 421 <S0>"/*"  |
 422 <S2>"/*"  {
 423                         yypcb->pcb_cstate = (YYSTATE);
 424                         BEGIN(S1);
 425                 }
 426 
 427 <S0>{RGX_INTERP} |
 428 <S2>{RGX_INTERP} ;        /* discard any #! lines */
 429 
 430 <S0>{RGX_CTL}     |
 431 <S2>{RGX_CTL}     |
 432 <S4>{RGX_CTL}     {
 433                         assert(yypragma == NULL);
 434                         yypcb->pcb_cstate = (YYSTATE);
 435                         BEGIN(S3);
 436                 }
 437 
 438 <S4>.             ;       /* discard */
 439 <S4>"\n"  ;       /* discard */
 440 
 441 <S0>"/"           {
 442                         int c, tok;
 443 
 444                         /*
 445                          * The use of "/" as the predicate delimiter and as the
 446                          * integer division symbol requires special lookahead
 447                          * to avoid a shift/reduce conflict in the D grammar.
 448                          * We look ahead to the next non-whitespace character.
 449                          * If we encounter EOF, ";", "{", or "/", then this "/"
 450                          * closes the predicate and we return DT_TOK_EPRED.
 451                          * If we encounter anything else, it's DT_TOK_DIV.
 452                          */
 453                         while ((c = input()) != 0) {
 454                                 if (strchr("\f\n\r\t\v ", c) == NULL)
 455                                         break;
 456                         }
 457 
 458                         if (c == 0 || c == ';' || c == '{' || c == '/') {
 459                                 if (yypcb->pcb_parens != 0) {
 460                                         yyerror("closing ) expected in "
 461                                             "predicate before /\n");
 462                                 }
 463                                 if (yypcb->pcb_brackets != 0) {
 464                                         yyerror("closing ] expected in "
 465                                             "predicate before /\n");
 466                                 }
 467                                 tok = DT_TOK_EPRED;
 468                         } else
 469                                 tok = DT_TOK_DIV;
 470 
 471                         unput(c);
 472                         return (tok);
 473                 }
 474 
 475 <S0>"("           {
 476                         yypcb->pcb_parens++;
 477                         return (DT_TOK_LPAR);
 478                 }
 479 
 480 <S0>")"           {
 481                         if (--yypcb->pcb_parens < 0)
 482                                 yyerror("extra ) in input stream\n");
 483                         return (DT_TOK_RPAR);
 484                 }
 485 
 486 <S0>"["           {
 487                         yypcb->pcb_brackets++;
 488                         return (DT_TOK_LBRAC);
 489                 }
 490 
 491 <S0>"]"           {
 492                         if (--yypcb->pcb_brackets < 0)
 493                                 yyerror("extra ] in input stream\n");
 494                         return (DT_TOK_RBRAC);
 495                 }
 496 
 497 <S0>"{"           |
 498 <S2>"{"           {
 499                         yypcb->pcb_braces++;
 500                         return ('{');
 501                 }
 502 
 503 <S0>"}"           {
 504                         if (--yypcb->pcb_braces < 0)
 505                                 yyerror("extra } in input stream\n");
 506                         return ('}');
 507                 }
 508 
 509 <S0>"|"           return (DT_TOK_BOR);
 510 <S0>"^"           return (DT_TOK_XOR);
 511 <S0>"&"               return (DT_TOK_BAND);
 512 <S0>"&&"  return (DT_TOK_LAND);
 513 <S0>"^^"  return (DT_TOK_LXOR);
 514 <S0>"||"  return (DT_TOK_LOR);
 515 <S0>"=="  return (DT_TOK_EQU);
 516 <S0>"!="  return (DT_TOK_NEQ);
 517 <S0>"<"                return (DT_TOK_LT);
 518 <S0>"<="       return (DT_TOK_LE);
 519 <S0>">"                return (DT_TOK_GT);
 520 <S0>">="       return (DT_TOK_GE);
 521 <S0>"<<"    return (DT_TOK_LSH);
 522 <S0>">>"    return (DT_TOK_RSH);
 523 <S0>"+"           return (DT_TOK_ADD);
 524 <S0>"-"           return (DT_TOK_SUB);
 525 <S0>"*"           return (DT_TOK_MUL);
 526 <S0>"%"           return (DT_TOK_MOD);
 527 <S0>"~"           return (DT_TOK_BNEG);
 528 <S0>"!"           return (DT_TOK_LNEG);
 529 <S0>"?"           return (DT_TOK_QUESTION);
 530 <S0>":"           return (DT_TOK_COLON);
 531 <S0>"."           return (DT_TOK_DOT);
 532 <S0>"->"       return (DT_TOK_PTR);
 533 <S0>"="           return (DT_TOK_ASGN);
 534 <S0>"+="  return (DT_TOK_ADD_EQ);
 535 <S0>"-="  return (DT_TOK_SUB_EQ);
 536 <S0>"*="  return (DT_TOK_MUL_EQ);
 537 <S0>"/="  return (DT_TOK_DIV_EQ);
 538 <S0>"%="  return (DT_TOK_MOD_EQ);
 539 <S0>"&="      return (DT_TOK_AND_EQ);
 540 <S0>"^="  return (DT_TOK_XOR_EQ);
 541 <S0>"|="  return (DT_TOK_OR_EQ);
 542 <S0>"<<="   return (DT_TOK_LSH_EQ);
 543 <S0>">>="   return (DT_TOK_RSH_EQ);
 544 <S0>"++"  return (DT_TOK_ADDADD);
 545 <S0>"--"  return (DT_TOK_SUBSUB);
 546 <S0>"..." return (DT_TOK_ELLIPSIS);
 547 <S0>","           return (DT_TOK_COMMA);
 548 <S0>";"           return (';');
 549 <S0>{RGX_WS}      ; /* discard */
 550 <S0>"\\"\n        ; /* discard */
 551 <S0>.             yyerror("syntax error near \"%c\"\n", yytext[0]);
 552 
 553 <S1>"/*"  yyerror("/* encountered inside a comment\n");
 554 <S1>"*/"  BEGIN(yypcb->pcb_cstate);
 555 <S1>.|\n  ; /* discard */
 556 
 557 <S2>{RGX_PSPEC}   {
 558                         /*
 559                          * S2 has an ambiguity because RGX_PSPEC includes '*'
 560                          * as a glob character and '*' also can be DT_TOK_STAR.
 561                          * Since lex always matches the longest token, this
 562                          * rule can be matched by an input string like "int*",
 563                          * which could begin a global variable declaration such
 564                          * as "int*x;" or could begin a RGX_PSPEC with globbing
 565                          * such as "int* { trace(timestamp); }".  If C_PSPEC is
 566                          * not set, we must resolve the ambiguity in favor of
 567                          * the type and perform lexer pushback if the fragment
 568                          * before '*' or entire fragment matches a type name.
 569                          * If C_PSPEC is set, we always return a PSPEC token.
 570                          * If C_PSPEC is off, the user can avoid ambiguity by
 571                          * including a ':' delimiter in the specifier, which
 572                          * they should be doing anyway to specify the provider.
 573                          */
 574                         if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) &&
 575                             strchr(yytext, ':') == NULL) {
 576 
 577                                 char *p = strchr(yytext, '*');
 578                                 char *q = yytext + yyleng - 1;
 579 
 580                                 if (p != NULL && p > yytext)
 581                                         *p = '\0'; /* prune yytext */
 582 
 583                                 if (dt_type_lookup(yytext, NULL) == 0) {
 584                                         yylval.l_str = strdup(yytext);
 585 
 586                                         if (yylval.l_str == NULL) {
 587                                                 longjmp(yypcb->pcb_jmpbuf,
 588                                                     EDT_NOMEM);
 589                                         }
 590 
 591                                         if (p != NULL && p > yytext) {
 592                                                 for (*p = '*'; q >= p; q--)
 593                                                         unput(*q);
 594                                         }
 595 
 596                                         yybegin(YYS_EXPR);
 597                                         return (DT_TOK_TNAME);
 598                                 }
 599 
 600                                 if (p != NULL && p > yytext)
 601                                         *p = '*'; /* restore yytext */
 602                         }
 603 
 604                         if ((yylval.l_str = strdup(yytext)) == NULL)
 605                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 606 
 607                         return (DT_TOK_PSPEC);
 608                 }
 609 
 610 <S2>"/"           return (DT_TOK_DIV);
 611 <S2>","           return (DT_TOK_COMMA);
 612 
 613 <S2>{RGX_WS}      ; /* discard */
 614 <S2>.             yyerror("syntax error near \"%c\"\n", yytext[0]);
 615 
 616 <S3>\n            {
 617                         dt_pragma(yypragma);
 618                         yypragma = NULL;
 619                         BEGIN(yypcb->pcb_cstate);
 620                 }
 621 
 622 <S3>[\f\t\v ]+    ; /* discard */
 623 
 624 <S3>[^\f\n\t\v "]+ {
 625                         dt_node_t *dnp;
 626 
 627                         if ((yylval.l_str = strdup(yytext)) == NULL)
 628                                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 629 
 630                         /*
 631                          * We want to call dt_node_ident() here, but we can't
 632                          * because it will expand inlined identifiers, which we
 633                          * don't want to do from #pragma context in order to
 634                          * support pragmas that apply to the ident itself.  We
 635                          * call dt_node_string() and then reset dn_op instead.
 636                          */
 637                         dnp = dt_node_string(yylval.l_str);
 638                         dnp->dn_kind = DT_NODE_IDENT;
 639                         dnp->dn_op = DT_TOK_IDENT;
 640                         yypragma = dt_node_link(yypragma, dnp);
 641                 }
 642 
 643 <S3>.             yyerror("syntax error near \"%c\"\n", yytext[0]);
 644 
 645 %%
 646 
 647 /*
 648  * yybegin provides a wrapper for use from C code around the lex BEGIN() macro.
 649  * We use two main states for lexing because probe descriptions use a syntax
 650  * that is incompatible with the normal D tokens (e.g. names can contain "-").
 651  * yybegin also handles the job of switching between two lists of dt_nodes
 652  * as we allocate persistent definitions, like inlines, and transient nodes
 653  * that will be freed once we are done parsing the current program file.
 654  */
 655 void
 656 yybegin(yystate_t state)
 657 {
 658 #ifdef  YYDEBUG
 659         yydebug = _dtrace_debug;
 660 #endif
 661         if (yypcb->pcb_yystate == state)
 662                 return; /* nothing to do if we're in the state already */
 663 
 664         if (yypcb->pcb_yystate == YYS_DEFINE) {
 665                 yypcb->pcb_list = yypcb->pcb_hold;
 666                 yypcb->pcb_hold = NULL;
 667         }
 668 
 669         switch (state) {
 670         case YYS_CLAUSE:
 671                 BEGIN(S2);
 672                 break;
 673         case YYS_DEFINE:
 674                 assert(yypcb->pcb_hold == NULL);
 675                 yypcb->pcb_hold = yypcb->pcb_list;
 676                 yypcb->pcb_list = NULL;
 677                 /*FALLTHRU*/
 678         case YYS_EXPR:
 679                 BEGIN(S0);
 680                 break;
 681         case YYS_DONE:
 682                 break;
 683         case YYS_CONTROL:
 684                 BEGIN(S4);
 685                 break;
 686         default:
 687                 xyerror(D_UNKNOWN, "internal error -- bad yystate %d\n", state);
 688         }
 689 
 690         yypcb->pcb_yystate = state;
 691 }
 692 
 693 void
 694 yyinit(dt_pcb_t *pcb)
 695 {
 696         yypcb = pcb;
 697         yylineno = 1;
 698         yypragma = NULL;
 699         yysptr = yysbuf;
 700 }
 701 
 702 /*
 703  * Given a lexeme 's' (typically yytext), set yylval and return an appropriate
 704  * token to the parser indicating either an identifier or a typedef name.
 705  * User-defined global variables always take precedence over types, but we do
 706  * use some heuristics because D programs can look at an ever-changing set of
 707  * kernel types and also can implicitly instantiate variables by assignment,
 708  * unlike in C.  The code here is ordered carefully as lookups are not cheap.
 709  */
 710 static int
 711 id_or_type(const char *s)
 712 {
 713         dtrace_hdl_t *dtp = yypcb->pcb_hdl;
 714         dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl;
 715         int c0, c1, ttok = DT_TOK_TNAME;
 716         dt_ident_t *idp;
 717 
 718         if ((s = yylval.l_str = strdup(s)) == NULL)
 719                 longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 720 
 721         /*
 722          * If the lexeme is a global variable or likely identifier or *not* a
 723          * type_name, then it is an identifier token.
 724          */
 725         if (dt_idstack_lookup(&yypcb->pcb_globals, s) != NULL ||
 726             dt_idhash_lookup(yypcb->pcb_idents, s) != NULL ||
 727             dt_type_lookup(s, NULL) != 0)
 728                 return (DT_TOK_IDENT);
 729 
 730         /*
 731          * If we're in the midst of parsing a declaration and a type_specifier
 732          * has already been shifted, then return DT_TOK_IDENT instead of TNAME.
 733          * This semantic is necessary to permit valid ISO C code such as:
 734          *
 735          * typedef int foo;
 736          * struct s { foo foo; };
 737          *
 738          * without causing shift/reduce conflicts in the direct_declarator part
 739          * of the grammar.  The result is that we must check for conflicting
 740          * redeclarations of the same identifier as part of dt_node_decl().
 741          */
 742         if (ddp != NULL && ddp->dd_name != NULL)
 743                 return (DT_TOK_IDENT);
 744 
 745         /*
 746          * If the lexeme is a type name and we are not in a program clause,
 747          * then always interpret it as a type and return DT_TOK_TNAME.
 748          */
 749         if ((YYSTATE) != S0)
 750                 return (DT_TOK_TNAME);
 751 
 752         /*
 753          * If the lexeme matches a type name but is in a program clause, then
 754          * it could be a type or it could be an undefined variable.  Peek at
 755          * the next token to decide.  If we see ++, --, [, or =, we know there
 756          * might be an assignment that is trying to create a global variable,
 757          * so we optimistically return DT_TOK_IDENT.  There is no harm in being
 758          * wrong: a type_name followed by ++, --, [, or = is a syntax error.
 759          */
 760         while ((c0 = input()) != 0) {
 761                 if (strchr("\f\n\r\t\v ", c0) == NULL)
 762                         break;
 763         }
 764 
 765         switch (c0) {
 766         case '+':
 767         case '-':
 768                 if ((c1 = input()) == c0)
 769                         ttok = DT_TOK_IDENT;
 770                 unput(c1);
 771                 break;
 772 
 773         case '=':
 774                 if ((c1 = input()) != c0)
 775                         ttok = DT_TOK_IDENT;
 776                 unput(c1);
 777                 break;
 778         case '[':
 779                 ttok = DT_TOK_IDENT;
 780                 break;
 781         }
 782 
 783         if (ttok == DT_TOK_IDENT) {
 784                 idp = dt_idhash_insert(yypcb->pcb_idents, s, DT_IDENT_SCALAR, 0,
 785                     0, _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen);
 786 
 787                 if (idp == NULL)
 788                         longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 789         }
 790 
 791         unput(c0);
 792         return (ttok);
 793 }
 794 
 795 static int
 796 input(void)
 797 {
 798         int c;
 799 
 800         if (yysptr > yysbuf)
 801                 c = *--yysptr;
 802         else if (yypcb->pcb_fileptr != NULL)
 803                 c = fgetc(yypcb->pcb_fileptr);
 804         else if (yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen)
 805                 c = *(unsigned char *)(yypcb->pcb_strptr++);
 806         else
 807                 c = EOF;
 808 
 809         if (c == '\n')
 810                 yylineno++;
 811 
 812         if (c != EOF)
 813                 return (c);
 814 
 815         if ((YYSTATE) == S1)
 816                 yyerror("end-of-file encountered before matching */\n");
 817 
 818         if ((YYSTATE) == S3)
 819                 yyerror("end-of-file encountered before end of control line\n");
 820 
 821         if (yypcb->pcb_fileptr != NULL && ferror(yypcb->pcb_fileptr))
 822                 longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
 823 
 824         return (0); /* EOF */
 825 }
 826 
 827 static void
 828 unput(int c)
 829 {
 830         if (c == '\n')
 831                 yylineno--;
 832 
 833         *yysptr++ = c;
 834         yytchar = c;
 835 }