Print this page
    
4474 DTrace Userland CTF Support
4475 DTrace userland Keyword
4476 DTrace tests should be better citizens
4479 pid provider types
4480 dof emulation missing checks
Reviewed by: Bryan Cantrill <bryan@joyent.com>
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/lib/libdtrace/common/dt_lex.l
          +++ new/usr/src/lib/libdtrace/common/dt_lex.l
   1    1  %{
   2    2  /*
   3    3   * CDDL HEADER START
   4    4   *
   5    5   * The contents of this file are subject to the terms of the
   6    6   * Common Development and Distribution License (the "License").
   7    7   * You may not use this file except in compliance with the License.
   8    8   *
   9    9   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10   10   * or http://www.opensolaris.org/os/licensing.
  11   11   * See the License for the specific language governing permissions
  12   12   * and limitations under the License.
  13   13   *
  14   14   * When distributing Covered Code, include this CDDL HEADER in each
  15   15   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | ↓ open down ↓ | 15 lines elided | ↑ open up ↑ | 
  16   16   * If applicable, add the following below this CDDL HEADER, with the
  17   17   * fields enclosed by brackets "[]" replaced with your own identifying
  18   18   * information: Portions Copyright [yyyy] [name of copyright owner]
  19   19   *
  20   20   * CDDL HEADER END
  21   21   */
  22   22  
  23   23  /*
  24   24   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25   25   */
       26 +/*
       27 + * Copyright (c) 2013 by Delphix. All rights reserved.
       28 + * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       29 + */
  26   30  
  27   31  #include <string.h>
  28   32  #include <stdlib.h>
  29   33  #include <stdio.h>
  30   34  #include <assert.h>
  31   35  #include <ctype.h>
  32   36  #include <errno.h>
  33   37  
  34   38  #include <dt_impl.h>
  35   39  #include <dt_grammar.h>
  36   40  #include <dt_parser.h>
  37   41  #include <dt_string.h>
  38   42  
  39   43  /*
  40   44   * We need to undefine lex's input and unput macros so that references to these
  41   45   * call the functions provided at the end of this source file.
  42   46   */
  43   47  #undef input
  44   48  #undef unput
  45   49  
  46   50  static int id_or_type(const char *);
  47   51  static int input(void);
  48   52  static void unput(int);
  49   53  
  50   54  /*
  51   55   * We first define a set of labeled states for use in the D lexer and then a
  52   56   * set of regular expressions to simplify things below. The lexer states are:
  
    | ↓ open down ↓ | 17 lines elided | ↑ open up ↑ | 
  53   57   *
  54   58   * S0 - D program clause and expression lexing
  55   59   * S1 - D comments (i.e. skip everything until end of comment)
  56   60   * S2 - D program outer scope (probe specifiers and declarations)
  57   61   * S3 - D control line parsing (i.e. after ^# is seen but before \n)
  58   62   * S4 - D control line scan (locate control directives only and invoke S3)
  59   63   */
  60   64  %}
  61   65  
  62   66  %e 1500         /* maximum nodes */
  63      -%p 3700         /* maximum positions */
       67 +%p 4900         /* maximum positions */
  64   68  %n 600          /* maximum states */
       69 +%a 3000         /* maximum transitions */
  65   70  
  66   71  %s S0 S1 S2 S3 S4
  67   72  
  68   73  RGX_AGG         "@"[a-zA-Z_][0-9a-zA-Z_]*
  69   74  RGX_PSPEC       [-$:a-zA-Z_.?*\\\[\]!][-$:0-9a-zA-Z_.`?*\\\[\]!]*
       75 +RGX_ALTIDENT    [a-zA-Z_][0-9a-zA-Z_]*
       76 +RGX_LMID        LM[0-9a-fA-F]+`
       77 +RGX_MOD_IDENT   [a-zA-Z_`][0-9a-z.A-Z_`]*`
  70   78  RGX_IDENT       [a-zA-Z_`][0-9a-zA-Z_`]*
  71   79  RGX_INT         ([0-9]+|0[xX][0-9A-Fa-f]+)[uU]?[lL]?[lL]?
  72   80  RGX_FP          ([0-9]+("."?)[0-9]*|"."[0-9]+)((e|E)("+"|-)?[0-9]+)?[fFlL]?
  73   81  RGX_WS          [\f\n\r\t\v ]
  74   82  RGX_STR         ([^"\\\n]|\\[^"\n]|\\\")*
  75   83  RGX_CHR         ([^'\\\n]|\\[^'\n]|\\')*
  76   84  RGX_INTERP      ^[\f\t\v ]*#!.*
  77   85  RGX_CTL         ^[\f\t\v ]*#
  78   86  
  79   87  %%
  80   88  
  81   89  %{
  82   90  
  83   91  /*
  84   92   * We insert a special prologue into yylex() itself: if the pcb contains a
  85   93   * context token, we return that prior to running the normal lexer.  This
  86   94   * allows libdtrace to force yacc into one of our three parsing contexts: D
  87   95   * expression (DT_CTX_DEXPR), D program (DT_CTX_DPROG) or D type (DT_CTX_DTYPE).
  88   96   * Once the token is returned, we clear it so this only happens once.
  89   97   */
  90   98  if (yypcb->pcb_token != 0) {
  91   99          int tok = yypcb->pcb_token;
  92  100          yypcb->pcb_token = 0;
  93  101          return (tok);
  94  102  }
  95  103  
  96  104  %}
  97  105  
  98  106  <S0>auto        return (DT_KEY_AUTO);
  99  107  <S0>break       return (DT_KEY_BREAK);
 100  108  <S0>case        return (DT_KEY_CASE);
 101  109  <S0>char        return (DT_KEY_CHAR);
 102  110  <S0>const       return (DT_KEY_CONST);
 103  111  <S0>continue    return (DT_KEY_CONTINUE);
 104  112  <S0>counter     return (DT_KEY_COUNTER);
 105  113  <S0>default     return (DT_KEY_DEFAULT);
 106  114  <S0>do          return (DT_KEY_DO);
 107  115  <S0>double      return (DT_KEY_DOUBLE);
 108  116  <S0>else        return (DT_KEY_ELSE);
 109  117  <S0>enum        return (DT_KEY_ENUM);
 110  118  <S0>extern      return (DT_KEY_EXTERN);
 111  119  <S0>float       return (DT_KEY_FLOAT);
 112  120  <S0>for         return (DT_KEY_FOR);
 113  121  <S0>goto        return (DT_KEY_GOTO);
 114  122  <S0>if          return (DT_KEY_IF);
 115  123  <S0>import      return (DT_KEY_IMPORT);
 116  124  <S0>inline      return (DT_KEY_INLINE);
 117  125  <S0>int         return (DT_KEY_INT);
 118  126  <S0>long        return (DT_KEY_LONG);
 119  127  <S0>offsetof    return (DT_TOK_OFFSETOF);
 120  128  <S0>probe       return (DT_KEY_PROBE);
 121  129  <S0>provider    return (DT_KEY_PROVIDER);
 122  130  <S0>register    return (DT_KEY_REGISTER);
 123  131  <S0>restrict    return (DT_KEY_RESTRICT);
 124  132  <S0>return      return (DT_KEY_RETURN);
 125  133  <S0>self        return (DT_KEY_SELF);
 126  134  <S0>short       return (DT_KEY_SHORT);
 127  135  <S0>signed      return (DT_KEY_SIGNED);
 128  136  <S0>sizeof      return (DT_TOK_SIZEOF);
  
    | ↓ open down ↓ | 49 lines elided | ↑ open up ↑ | 
 129  137  <S0>static      return (DT_KEY_STATIC);
 130  138  <S0>string      return (DT_KEY_STRING);
 131  139  <S0>stringof    return (DT_TOK_STRINGOF);
 132  140  <S0>struct      return (DT_KEY_STRUCT);
 133  141  <S0>switch      return (DT_KEY_SWITCH);
 134  142  <S0>this        return (DT_KEY_THIS);
 135  143  <S0>translator  return (DT_KEY_XLATOR);
 136  144  <S0>typedef     return (DT_KEY_TYPEDEF);
 137  145  <S0>union       return (DT_KEY_UNION);
 138  146  <S0>unsigned    return (DT_KEY_UNSIGNED);
      147 +<S0>userland    return (DT_KEY_USERLAND);
 139  148  <S0>void        return (DT_KEY_VOID);
 140  149  <S0>volatile    return (DT_KEY_VOLATILE);
 141  150  <S0>while       return (DT_KEY_WHILE);
 142  151  <S0>xlate       return (DT_TOK_XLATE);
 143  152  
 144  153  <S2>auto        { yybegin(YYS_EXPR);    return (DT_KEY_AUTO); }
 145  154  <S2>char        { yybegin(YYS_EXPR);    return (DT_KEY_CHAR); }
 146  155  <S2>const       { yybegin(YYS_EXPR);    return (DT_KEY_CONST); }
 147  156  <S2>counter     { yybegin(YYS_DEFINE);  return (DT_KEY_COUNTER); }
 148  157  <S2>double      { yybegin(YYS_EXPR);    return (DT_KEY_DOUBLE); }
 149  158  <S2>enum        { yybegin(YYS_EXPR);    return (DT_KEY_ENUM); }
 150  159  <S2>extern      { yybegin(YYS_EXPR);    return (DT_KEY_EXTERN); }
 151  160  <S2>float       { yybegin(YYS_EXPR);    return (DT_KEY_FLOAT); }
 152  161  <S2>import      { yybegin(YYS_EXPR);    return (DT_KEY_IMPORT); }
 153  162  <S2>inline      { yybegin(YYS_DEFINE);  return (DT_KEY_INLINE); }
 154  163  <S2>int         { yybegin(YYS_EXPR);    return (DT_KEY_INT); }
 155  164  <S2>long        { yybegin(YYS_EXPR);    return (DT_KEY_LONG); }
 156  165  <S2>provider    { yybegin(YYS_DEFINE);  return (DT_KEY_PROVIDER); }
 157  166  <S2>register    { yybegin(YYS_EXPR);    return (DT_KEY_REGISTER); }
 158  167  <S2>restrict    { yybegin(YYS_EXPR);    return (DT_KEY_RESTRICT); }
 159  168  <S2>self        { yybegin(YYS_EXPR);    return (DT_KEY_SELF); }
 160  169  <S2>short       { yybegin(YYS_EXPR);    return (DT_KEY_SHORT); }
 161  170  <S2>signed      { yybegin(YYS_EXPR);    return (DT_KEY_SIGNED); }
 162  171  <S2>static      { yybegin(YYS_EXPR);    return (DT_KEY_STATIC); }
 163  172  <S2>string      { yybegin(YYS_EXPR);    return (DT_KEY_STRING); }
 164  173  <S2>struct      { yybegin(YYS_EXPR);    return (DT_KEY_STRUCT); }
 165  174  <S2>this        { yybegin(YYS_EXPR);    return (DT_KEY_THIS); }
 166  175  <S2>translator  { yybegin(YYS_DEFINE);  return (DT_KEY_XLATOR); }
 167  176  <S2>typedef     { yybegin(YYS_EXPR);    return (DT_KEY_TYPEDEF); }
 168  177  <S2>union       { yybegin(YYS_EXPR);    return (DT_KEY_UNION); }
 169  178  <S2>unsigned    { yybegin(YYS_EXPR);    return (DT_KEY_UNSIGNED); }
 170  179  <S2>void        { yybegin(YYS_EXPR);    return (DT_KEY_VOID); }
 171  180  <S2>volatile    { yybegin(YYS_EXPR);    return (DT_KEY_VOLATILE); }
 172  181  
 173  182  <S0>"$$"[0-9]+  {
 174  183                          int i = atoi(yytext + 2);
 175  184                          char *v = "";
 176  185  
 177  186                          /*
 178  187                           * A macro argument reference substitutes the text of
 179  188                           * an argument in place of the current token.  When we
 180  189                           * see $$<d> we fetch the saved string from pcb_sargv
 181  190                           * (or use the default argument if the option has been
 182  191                           * set and the argument hasn't been specified) and
 183  192                           * return a token corresponding to this string.
 184  193                           */
 185  194                          if (i < 0 || (i >= yypcb->pcb_sargc &&
 186  195                              !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) {
 187  196                                  xyerror(D_MACRO_UNDEF, "macro argument %s is "
 188  197                                      "not defined\n", yytext);
 189  198                          }
 190  199  
 191  200                          if (i < yypcb->pcb_sargc) {
 192  201                                  v = yypcb->pcb_sargv[i]; /* get val from pcb */
 193  202                                  yypcb->pcb_sflagv[i] |= DT_IDFLG_REF;
 194  203                          }
 195  204  
 196  205                          if ((yylval.l_str = strdup(v)) == NULL)
 197  206                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 198  207  
 199  208                          (void) stresc2chr(yylval.l_str);
 200  209                          return (DT_TOK_STRING);
 201  210                  }
 202  211  
 203  212  <S0>"$"[0-9]+   {
 204  213                          int i = atoi(yytext + 1);
 205  214                          char *p, *v = "0";
 206  215  
 207  216                          /*
 208  217                           * A macro argument reference substitutes the text of
 209  218                           * one identifier or integer pattern for another.  When
 210  219                           * we see $<d> we fetch the saved string from pcb_sargv
 211  220                           * (or use the default argument if the option has been
 212  221                           * set and the argument hasn't been specified) and
 213  222                           * return a token corresponding to this string.
 214  223                           */
 215  224                          if (i < 0 || (i >= yypcb->pcb_sargc &&
 216  225                              !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) {
 217  226                                  xyerror(D_MACRO_UNDEF, "macro argument %s is "
 218  227                                      "not defined\n", yytext);
 219  228                          }
 220  229  
 221  230                          if (i < yypcb->pcb_sargc) {
 222  231                                  v = yypcb->pcb_sargv[i]; /* get val from pcb */
 223  232                                  yypcb->pcb_sflagv[i] |= DT_IDFLG_REF;
 224  233                          }
 225  234  
 226  235                          /*
 227  236                           * If the macro text is not a valid integer or ident,
 228  237                           * then we treat it as a string.  The string may be
 229  238                           * optionally enclosed in quotes, which we strip.
 230  239                           */
 231  240                          if (strbadidnum(v)) {
 232  241                                  size_t len = strlen(v);
 233  242  
 234  243                                  if (len != 1 && *v == '"' && v[len - 1] == '"')
 235  244                                          yylval.l_str = strndup(v + 1, len - 2);
 236  245                                  else
 237  246                                          yylval.l_str = strndup(v, len);
 238  247  
 239  248                                  if (yylval.l_str == NULL)
 240  249                                          longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 241  250  
 242  251                                  (void) stresc2chr(yylval.l_str);
 243  252                                  return (DT_TOK_STRING);
 244  253                          }
 245  254  
 246  255                          /*
 247  256                           * If the macro text is not a string an begins with a
 248  257                           * digit or a +/- sign, process it as an integer token.
 249  258                           */
 250  259                          if (isdigit(v[0]) || v[0] == '-' || v[0] == '+') {
 251  260                                  if (isdigit(v[0]))
 252  261                                          yyintprefix = 0;
 253  262                                  else
 254  263                                          yyintprefix = *v++;
 255  264  
 256  265                                  errno = 0;
 257  266                                  yylval.l_int = strtoull(v, &p, 0);
 258  267                                  (void) strncpy(yyintsuffix, p,
 259  268                                      sizeof (yyintsuffix));
 260  269                                  yyintdecimal = *v != '0';
 261  270  
 262  271                                  if (errno == ERANGE) {
 263  272                                          xyerror(D_MACRO_OFLOW, "macro argument"
 264  273                                              " %s constant %s results in integer"
 265  274                                              " overflow\n", yytext, v);
 266  275                                  }
 267  276  
 268  277                                  return (DT_TOK_INT);
 269  278                          }
 270  279  
 271  280                          return (id_or_type(v));
 272  281                  }
 273  282  
 274  283  <S0>"$$"{RGX_IDENT} {
 275  284                          dt_ident_t *idp = dt_idhash_lookup(
 276  285                              yypcb->pcb_hdl->dt_macros, yytext + 2);
 277  286  
 278  287                          char s[16]; /* enough for UINT_MAX + \0 */
 279  288  
 280  289                          if (idp == NULL) {
 281  290                                  xyerror(D_MACRO_UNDEF, "macro variable %s "
 282  291                                      "is not defined\n", yytext);
 283  292                          }
 284  293  
 285  294                          /*
 286  295                           * For the moment, all current macro variables are of
 287  296                           * type id_t (refer to dtrace_update() for details).
 288  297                           */
 289  298                          (void) snprintf(s, sizeof (s), "%u", idp->di_id);
 290  299                          if ((yylval.l_str = strdup(s)) == NULL)
 291  300                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 292  301  
 293  302                          return (DT_TOK_STRING);
 294  303                  }
 295  304  
 296  305  <S0>"$"{RGX_IDENT} {
 297  306                          dt_ident_t *idp = dt_idhash_lookup(
 298  307                              yypcb->pcb_hdl->dt_macros, yytext + 1);
 299  308  
 300  309                          if (idp == NULL) {
 301  310                                  xyerror(D_MACRO_UNDEF, "macro variable %s "
 302  311                                      "is not defined\n", yytext);
 303  312                          }
 304  313  
 305  314                          /*
 306  315                           * For the moment, all current macro variables are of
  
    | ↓ open down ↓ | 158 lines elided | ↑ open up ↑ | 
 307  316                           * type id_t (refer to dtrace_update() for details).
 308  317                           */
 309  318                          yylval.l_int = (intmax_t)(int)idp->di_id;
 310  319                          yyintprefix = 0;
 311  320                          yyintsuffix[0] = '\0';
 312  321                          yyintdecimal = 1;
 313  322  
 314  323                          return (DT_TOK_INT);
 315  324                  }
 316  325  
 317      -<S0>{RGX_IDENT} {
      326 +<S0>{RGX_IDENT} |
      327 +<S0>{RGX_MOD_IDENT}{RGX_IDENT} |
      328 +<S0>{RGX_MOD_IDENT} {
 318  329                          return (id_or_type(yytext));
 319  330                  }
 320  331  
 321  332  <S0>{RGX_AGG}   {
 322  333                          if ((yylval.l_str = strdup(yytext)) == NULL)
 323  334                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 324  335                          return (DT_TOK_AGG);
 325  336                  }
 326  337  
 327  338  <S0>"@"         {
 328  339                          if ((yylval.l_str = strdup("@_")) == NULL)
 329  340                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 330  341                          return (DT_TOK_AGG);
 331  342                  }
 332  343  
 333  344  <S0>{RGX_INT}   |
 334  345  <S2>{RGX_INT}   |
 335  346  <S3>{RGX_INT}   {
 336  347                          char *p;
 337  348  
 338  349                          errno = 0;
 339  350                          yylval.l_int = strtoull(yytext, &p, 0);
 340  351                          yyintprefix = 0;
 341  352                          (void) strncpy(yyintsuffix, p, sizeof (yyintsuffix));
 342  353                          yyintdecimal = yytext[0] != '0';
 343  354  
 344  355                          if (errno == ERANGE) {
 345  356                                  xyerror(D_INT_OFLOW, "constant %s results in "
 346  357                                      "integer overflow\n", yytext);
 347  358                          }
 348  359  
 349  360                          if (*p != '\0' && strchr("uUlL", *p) == NULL) {
 350  361                                  xyerror(D_INT_DIGIT, "constant %s contains "
 351  362                                      "invalid digit %c\n", yytext, *p);
 352  363                          }
 353  364  
 354  365                          if ((YYSTATE) != S3)
 355  366                                  return (DT_TOK_INT);
 356  367  
 357  368                          yypragma = dt_node_link(yypragma,
 358  369                              dt_node_int(yylval.l_int));
 359  370                  }
 360  371  
 361  372  <S0>{RGX_FP}    yyerror("floating-point constants are not permitted\n");
 362  373  
 363  374  <S0>\"{RGX_STR}$ |
 364  375  <S3>\"{RGX_STR}$ xyerror(D_STR_NL, "newline encountered in string literal");
 365  376  
 366  377  <S0>\"{RGX_STR}\" |
 367  378  <S3>\"{RGX_STR}\" {
 368  379                          /*
 369  380                           * Quoted string -- convert C escape sequences and
 370  381                           * return the string as a token.
 371  382                           */
 372  383                          yylval.l_str = strndup(yytext + 1, yyleng - 2);
 373  384  
 374  385                          if (yylval.l_str == NULL)
 375  386                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 376  387  
 377  388                          (void) stresc2chr(yylval.l_str);
 378  389                          if ((YYSTATE) != S3)
 379  390                                  return (DT_TOK_STRING);
 380  391  
 381  392                          yypragma = dt_node_link(yypragma,
 382  393                              dt_node_string(yylval.l_str));
 383  394                  }
 384  395  
 385  396  <S0>'{RGX_CHR}$ xyerror(D_CHR_NL, "newline encountered in character constant");
 386  397  
 387  398  <S0>'{RGX_CHR}' {
 388  399                          char *s, *p, *q;
 389  400                          size_t nbytes;
 390  401  
 391  402                          /*
 392  403                           * Character constant -- convert C escape sequences and
 393  404                           * return the character as an integer immediate value.
 394  405                           */
 395  406                          if (yyleng == 2)
 396  407                                  xyerror(D_CHR_NULL, "empty character constant");
 397  408  
 398  409                          s = yytext + 1;
 399  410                          yytext[yyleng - 1] = '\0';
 400  411                          nbytes = stresc2chr(s);
 401  412                          yylval.l_int = 0;
 402  413                          yyintprefix = 0;
 403  414                          yyintsuffix[0] = '\0';
 404  415                          yyintdecimal = 1;
 405  416  
 406  417                          if (nbytes > sizeof (yylval.l_int)) {
 407  418                                  xyerror(D_CHR_OFLOW, "character constant is "
 408  419                                      "too long");
 409  420                          }
 410  421  #ifdef _LITTLE_ENDIAN
 411  422                          p = ((char *)&yylval.l_int) + nbytes - 1;
 412  423                          for (q = s; nbytes != 0; nbytes--)
 413  424                                  *p-- = *q++;
 414  425  #else
 415  426                          bcopy(s, ((char *)&yylval.l_int) +
 416  427                              sizeof (yylval.l_int) - nbytes, nbytes);
 417  428  #endif
 418  429                          return (DT_TOK_INT);
 419  430                  }
 420  431  
 421  432  <S0>"/*"        |
 422  433  <S2>"/*"        {
 423  434                          yypcb->pcb_cstate = (YYSTATE);
 424  435                          BEGIN(S1);
 425  436                  }
 426  437  
 427  438  <S0>{RGX_INTERP} |
 428  439  <S2>{RGX_INTERP} ;      /* discard any #! lines */
 429  440  
 430  441  <S0>{RGX_CTL}   |
 431  442  <S2>{RGX_CTL}   |
 432  443  <S4>{RGX_CTL}   {
 433  444                          assert(yypragma == NULL);
 434  445                          yypcb->pcb_cstate = (YYSTATE);
 435  446                          BEGIN(S3);
 436  447                  }
 437  448  
 438  449  <S4>.           ;       /* discard */
 439  450  <S4>"\n"        ;       /* discard */
 440  451  
 441  452  <S0>"/"         {
 442  453                          int c, tok;
 443  454  
 444  455                          /*
 445  456                           * The use of "/" as the predicate delimiter and as the
 446  457                           * integer division symbol requires special lookahead
 447  458                           * to avoid a shift/reduce conflict in the D grammar.
 448  459                           * We look ahead to the next non-whitespace character.
 449  460                           * If we encounter EOF, ";", "{", or "/", then this "/"
 450  461                           * closes the predicate and we return DT_TOK_EPRED.
 451  462                           * If we encounter anything else, it's DT_TOK_DIV.
 452  463                           */
 453  464                          while ((c = input()) != 0) {
 454  465                                  if (strchr("\f\n\r\t\v ", c) == NULL)
 455  466                                          break;
 456  467                          }
 457  468  
 458  469                          if (c == 0 || c == ';' || c == '{' || c == '/') {
 459  470                                  if (yypcb->pcb_parens != 0) {
 460  471                                          yyerror("closing ) expected in "
 461  472                                              "predicate before /\n");
 462  473                                  }
 463  474                                  if (yypcb->pcb_brackets != 0) {
 464  475                                          yyerror("closing ] expected in "
 465  476                                              "predicate before /\n");
 466  477                                  }
 467  478                                  tok = DT_TOK_EPRED;
 468  479                          } else
 469  480                                  tok = DT_TOK_DIV;
 470  481  
 471  482                          unput(c);
 472  483                          return (tok);
 473  484                  }
 474  485  
 475  486  <S0>"("         {
 476  487                          yypcb->pcb_parens++;
 477  488                          return (DT_TOK_LPAR);
 478  489                  }
 479  490  
 480  491  <S0>")"         {
 481  492                          if (--yypcb->pcb_parens < 0)
 482  493                                  yyerror("extra ) in input stream\n");
 483  494                          return (DT_TOK_RPAR);
 484  495                  }
 485  496  
 486  497  <S0>"["         {
 487  498                          yypcb->pcb_brackets++;
 488  499                          return (DT_TOK_LBRAC);
 489  500                  }
 490  501  
 491  502  <S0>"]"         {
 492  503                          if (--yypcb->pcb_brackets < 0)
 493  504                                  yyerror("extra ] in input stream\n");
 494  505                          return (DT_TOK_RBRAC);
 495  506                  }
 496  507  
 497  508  <S0>"{"         |
 498  509  <S2>"{"         {
 499  510                          yypcb->pcb_braces++;
 500  511                          return ('{');
 501  512                  }
 502  513  
 503  514  <S0>"}"         {
 504  515                          if (--yypcb->pcb_braces < 0)
 505  516                                  yyerror("extra } in input stream\n");
 506  517                          return ('}');
 507  518                  }
 508  519  
 509  520  <S0>"|"         return (DT_TOK_BOR);
 510  521  <S0>"^"         return (DT_TOK_XOR);
 511  522  <S0>"&"         return (DT_TOK_BAND);
 512  523  <S0>"&&"        return (DT_TOK_LAND);
 513  524  <S0>"^^"        return (DT_TOK_LXOR);
 514  525  <S0>"||"        return (DT_TOK_LOR);
 515  526  <S0>"=="        return (DT_TOK_EQU);
 516  527  <S0>"!="        return (DT_TOK_NEQ);
 517  528  <S0>"<"         return (DT_TOK_LT);
 518  529  <S0>"<="        return (DT_TOK_LE);
 519  530  <S0>">"         return (DT_TOK_GT);
 520  531  <S0>">="        return (DT_TOK_GE);
 521  532  <S0>"<<"        return (DT_TOK_LSH);
 522  533  <S0>">>"        return (DT_TOK_RSH);
 523  534  <S0>"+"         return (DT_TOK_ADD);
 524  535  <S0>"-"         return (DT_TOK_SUB);
 525  536  <S0>"*"         return (DT_TOK_MUL);
 526  537  <S0>"%"         return (DT_TOK_MOD);
 527  538  <S0>"~"         return (DT_TOK_BNEG);
 528  539  <S0>"!"         return (DT_TOK_LNEG);
 529  540  <S0>"?"         return (DT_TOK_QUESTION);
 530  541  <S0>":"         return (DT_TOK_COLON);
 531  542  <S0>"."         return (DT_TOK_DOT);
 532  543  <S0>"->"        return (DT_TOK_PTR);
 533  544  <S0>"="         return (DT_TOK_ASGN);
 534  545  <S0>"+="        return (DT_TOK_ADD_EQ);
 535  546  <S0>"-="        return (DT_TOK_SUB_EQ);
 536  547  <S0>"*="        return (DT_TOK_MUL_EQ);
 537  548  <S0>"/="        return (DT_TOK_DIV_EQ);
 538  549  <S0>"%="        return (DT_TOK_MOD_EQ);
 539  550  <S0>"&="        return (DT_TOK_AND_EQ);
 540  551  <S0>"^="        return (DT_TOK_XOR_EQ);
 541  552  <S0>"|="        return (DT_TOK_OR_EQ);
 542  553  <S0>"<<="       return (DT_TOK_LSH_EQ);
 543  554  <S0>">>="       return (DT_TOK_RSH_EQ);
 544  555  <S0>"++"        return (DT_TOK_ADDADD);
 545  556  <S0>"--"        return (DT_TOK_SUBSUB);
 546  557  <S0>"..."       return (DT_TOK_ELLIPSIS);
 547  558  <S0>","         return (DT_TOK_COMMA);
 548  559  <S0>";"         return (';');
 549  560  <S0>{RGX_WS}    ; /* discard */
 550  561  <S0>"\\"\n      ; /* discard */
 551  562  <S0>.           yyerror("syntax error near \"%c\"\n", yytext[0]);
 552  563  
 553  564  <S1>"/*"        yyerror("/* encountered inside a comment\n");
 554  565  <S1>"*/"        BEGIN(yypcb->pcb_cstate);
 555  566  <S1>.|\n        ; /* discard */
 556  567  
 557  568  <S2>{RGX_PSPEC} {
 558  569                          /*
 559  570                           * S2 has an ambiguity because RGX_PSPEC includes '*'
 560  571                           * as a glob character and '*' also can be DT_TOK_STAR.
 561  572                           * Since lex always matches the longest token, this
 562  573                           * rule can be matched by an input string like "int*",
 563  574                           * which could begin a global variable declaration such
 564  575                           * as "int*x;" or could begin a RGX_PSPEC with globbing
 565  576                           * such as "int* { trace(timestamp); }".  If C_PSPEC is
 566  577                           * not set, we must resolve the ambiguity in favor of
 567  578                           * the type and perform lexer pushback if the fragment
 568  579                           * before '*' or entire fragment matches a type name.
 569  580                           * If C_PSPEC is set, we always return a PSPEC token.
 570  581                           * If C_PSPEC is off, the user can avoid ambiguity by
 571  582                           * including a ':' delimiter in the specifier, which
 572  583                           * they should be doing anyway to specify the provider.
 573  584                           */
 574  585                          if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) &&
 575  586                              strchr(yytext, ':') == NULL) {
 576  587  
 577  588                                  char *p = strchr(yytext, '*');
 578  589                                  char *q = yytext + yyleng - 1;
 579  590  
 580  591                                  if (p != NULL && p > yytext)
 581  592                                          *p = '\0'; /* prune yytext */
 582  593  
 583  594                                  if (dt_type_lookup(yytext, NULL) == 0) {
 584  595                                          yylval.l_str = strdup(yytext);
 585  596  
 586  597                                          if (yylval.l_str == NULL) {
 587  598                                                  longjmp(yypcb->pcb_jmpbuf,
 588  599                                                      EDT_NOMEM);
 589  600                                          }
 590  601  
 591  602                                          if (p != NULL && p > yytext) {
 592  603                                                  for (*p = '*'; q >= p; q--)
 593  604                                                          unput(*q);
 594  605                                          }
 595  606  
 596  607                                          yybegin(YYS_EXPR);
 597  608                                          return (DT_TOK_TNAME);
 598  609                                  }
 599  610  
 600  611                                  if (p != NULL && p > yytext)
 601  612                                          *p = '*'; /* restore yytext */
 602  613                          }
 603  614  
 604  615                          if ((yylval.l_str = strdup(yytext)) == NULL)
 605  616                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 606  617  
 607  618                          return (DT_TOK_PSPEC);
 608  619                  }
 609  620  
 610  621  <S2>"/"         return (DT_TOK_DIV);
 611  622  <S2>","         return (DT_TOK_COMMA);
 612  623  
 613  624  <S2>{RGX_WS}    ; /* discard */
 614  625  <S2>.           yyerror("syntax error near \"%c\"\n", yytext[0]);
 615  626  
 616  627  <S3>\n          {
 617  628                          dt_pragma(yypragma);
 618  629                          yypragma = NULL;
 619  630                          BEGIN(yypcb->pcb_cstate);
 620  631                  }
 621  632  
 622  633  <S3>[\f\t\v ]+  ; /* discard */
 623  634  
 624  635  <S3>[^\f\n\t\v "]+ {
 625  636                          dt_node_t *dnp;
 626  637  
 627  638                          if ((yylval.l_str = strdup(yytext)) == NULL)
 628  639                                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 629  640  
 630  641                          /*
 631  642                           * We want to call dt_node_ident() here, but we can't
 632  643                           * because it will expand inlined identifiers, which we
 633  644                           * don't want to do from #pragma context in order to
 634  645                           * support pragmas that apply to the ident itself.  We
 635  646                           * call dt_node_string() and then reset dn_op instead.
 636  647                           */
 637  648                          dnp = dt_node_string(yylval.l_str);
 638  649                          dnp->dn_kind = DT_NODE_IDENT;
 639  650                          dnp->dn_op = DT_TOK_IDENT;
 640  651                          yypragma = dt_node_link(yypragma, dnp);
 641  652                  }
 642  653  
 643  654  <S3>.           yyerror("syntax error near \"%c\"\n", yytext[0]);
 644  655  
 645  656  %%
 646  657  
 647  658  /*
 648  659   * yybegin provides a wrapper for use from C code around the lex BEGIN() macro.
 649  660   * We use two main states for lexing because probe descriptions use a syntax
 650  661   * that is incompatible with the normal D tokens (e.g. names can contain "-").
 651  662   * yybegin also handles the job of switching between two lists of dt_nodes
 652  663   * as we allocate persistent definitions, like inlines, and transient nodes
 653  664   * that will be freed once we are done parsing the current program file.
 654  665   */
 655  666  void
 656  667  yybegin(yystate_t state)
 657  668  {
 658  669  #ifdef  YYDEBUG
 659  670          yydebug = _dtrace_debug;
 660  671  #endif
 661  672          if (yypcb->pcb_yystate == state)
 662  673                  return; /* nothing to do if we're in the state already */
 663  674  
 664  675          if (yypcb->pcb_yystate == YYS_DEFINE) {
 665  676                  yypcb->pcb_list = yypcb->pcb_hold;
 666  677                  yypcb->pcb_hold = NULL;
 667  678          }
 668  679  
 669  680          switch (state) {
 670  681          case YYS_CLAUSE:
 671  682                  BEGIN(S2);
 672  683                  break;
 673  684          case YYS_DEFINE:
 674  685                  assert(yypcb->pcb_hold == NULL);
 675  686                  yypcb->pcb_hold = yypcb->pcb_list;
 676  687                  yypcb->pcb_list = NULL;
 677  688                  /*FALLTHRU*/
 678  689          case YYS_EXPR:
 679  690                  BEGIN(S0);
 680  691                  break;
 681  692          case YYS_DONE:
 682  693                  break;
 683  694          case YYS_CONTROL:
 684  695                  BEGIN(S4);
 685  696                  break;
 686  697          default:
 687  698                  xyerror(D_UNKNOWN, "internal error -- bad yystate %d\n", state);
 688  699          }
 689  700  
 690  701          yypcb->pcb_yystate = state;
 691  702  }
 692  703  
 693  704  void
 694  705  yyinit(dt_pcb_t *pcb)
 695  706  {
 696  707          yypcb = pcb;
 697  708          yylineno = 1;
 698  709          yypragma = NULL;
 699  710          yysptr = yysbuf;
 700  711  }
 701  712  
 702  713  /*
 703  714   * Given a lexeme 's' (typically yytext), set yylval and return an appropriate
 704  715   * token to the parser indicating either an identifier or a typedef name.
 705  716   * User-defined global variables always take precedence over types, but we do
 706  717   * use some heuristics because D programs can look at an ever-changing set of
 707  718   * kernel types and also can implicitly instantiate variables by assignment,
 708  719   * unlike in C.  The code here is ordered carefully as lookups are not cheap.
 709  720   */
 710  721  static int
 711  722  id_or_type(const char *s)
 712  723  {
 713  724          dtrace_hdl_t *dtp = yypcb->pcb_hdl;
 714  725          dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl;
 715  726          int c0, c1, ttok = DT_TOK_TNAME;
 716  727          dt_ident_t *idp;
 717  728  
 718  729          if ((s = yylval.l_str = strdup(s)) == NULL)
 719  730                  longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 720  731  
 721  732          /*
 722  733           * If the lexeme is a global variable or likely identifier or *not* a
 723  734           * type_name, then it is an identifier token.
 724  735           */
 725  736          if (dt_idstack_lookup(&yypcb->pcb_globals, s) != NULL ||
 726  737              dt_idhash_lookup(yypcb->pcb_idents, s) != NULL ||
 727  738              dt_type_lookup(s, NULL) != 0)
 728  739                  return (DT_TOK_IDENT);
 729  740  
 730  741          /*
 731  742           * If we're in the midst of parsing a declaration and a type_specifier
 732  743           * has already been shifted, then return DT_TOK_IDENT instead of TNAME.
 733  744           * This semantic is necessary to permit valid ISO C code such as:
 734  745           *
 735  746           * typedef int foo;
 736  747           * struct s { foo foo; };
 737  748           *
 738  749           * without causing shift/reduce conflicts in the direct_declarator part
 739  750           * of the grammar.  The result is that we must check for conflicting
 740  751           * redeclarations of the same identifier as part of dt_node_decl().
 741  752           */
 742  753          if (ddp != NULL && ddp->dd_name != NULL)
 743  754                  return (DT_TOK_IDENT);
 744  755  
 745  756          /*
 746  757           * If the lexeme is a type name and we are not in a program clause,
 747  758           * then always interpret it as a type and return DT_TOK_TNAME.
 748  759           */
 749  760          if ((YYSTATE) != S0)
 750  761                  return (DT_TOK_TNAME);
 751  762  
 752  763          /*
 753  764           * If the lexeme matches a type name but is in a program clause, then
 754  765           * it could be a type or it could be an undefined variable.  Peek at
 755  766           * the next token to decide.  If we see ++, --, [, or =, we know there
 756  767           * might be an assignment that is trying to create a global variable,
 757  768           * so we optimistically return DT_TOK_IDENT.  There is no harm in being
 758  769           * wrong: a type_name followed by ++, --, [, or = is a syntax error.
 759  770           */
 760  771          while ((c0 = input()) != 0) {
 761  772                  if (strchr("\f\n\r\t\v ", c0) == NULL)
 762  773                          break;
 763  774          }
 764  775  
 765  776          switch (c0) {
 766  777          case '+':
 767  778          case '-':
 768  779                  if ((c1 = input()) == c0)
 769  780                          ttok = DT_TOK_IDENT;
 770  781                  unput(c1);
 771  782                  break;
 772  783  
 773  784          case '=':
 774  785                  if ((c1 = input()) != c0)
 775  786                          ttok = DT_TOK_IDENT;
 776  787                  unput(c1);
 777  788                  break;
 778  789          case '[':
 779  790                  ttok = DT_TOK_IDENT;
 780  791                  break;
 781  792          }
 782  793  
 783  794          if (ttok == DT_TOK_IDENT) {
 784  795                  idp = dt_idhash_insert(yypcb->pcb_idents, s, DT_IDENT_SCALAR, 0,
 785  796                      0, _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen);
 786  797  
 787  798                  if (idp == NULL)
 788  799                          longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
 789  800          }
 790  801  
 791  802          unput(c0);
 792  803          return (ttok);
 793  804  }
 794  805  
 795  806  static int
 796  807  input(void)
 797  808  {
 798  809          int c;
 799  810  
 800  811          if (yysptr > yysbuf)
 801  812                  c = *--yysptr;
 802  813          else if (yypcb->pcb_fileptr != NULL)
 803  814                  c = fgetc(yypcb->pcb_fileptr);
 804  815          else if (yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen)
 805  816                  c = *(unsigned char *)(yypcb->pcb_strptr++);
 806  817          else
 807  818                  c = EOF;
 808  819  
 809  820          if (c == '\n')
 810  821                  yylineno++;
 811  822  
 812  823          if (c != EOF)
 813  824                  return (c);
 814  825  
 815  826          if ((YYSTATE) == S1)
 816  827                  yyerror("end-of-file encountered before matching */\n");
 817  828  
 818  829          if ((YYSTATE) == S3)
 819  830                  yyerror("end-of-file encountered before end of control line\n");
 820  831  
 821  832          if (yypcb->pcb_fileptr != NULL && ferror(yypcb->pcb_fileptr))
 822  833                  longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
 823  834  
 824  835          return (0); /* EOF */
 825  836  }
 826  837  
 827  838  static void
 828  839  unput(int c)
 829  840  {
 830  841          if (c == '\n')
 831  842                  yylineno--;
 832  843  
 833  844          *yysptr++ = c;
 834  845          yytchar = c;
 835  846  }
  
    | ↓ open down ↓ | 508 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX