1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  28  * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  29  */
  30 
  31 #include <sys/modctl.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/dtrace.h>
  34 #include <sys/kobj.h>
  35 #include <sys/stat.h>
  36 #include <sys/conf.h>
  37 #include <vm/seg_kmem.h>
  38 #include <sys/stack.h>
  39 #include <sys/frame.h>
  40 #include <sys/dtrace_impl.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/privregs.h>
  44 #include <sys/sdt_impl.h>
  45 
  46 #define SDT_PATCHVAL    0xf0
  47 #define SDT_ADDR2NDX(addr)      ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask)
  48 #define SDT_PROBETAB_SIZE       0x1000          /* 4k entries -- 16K total */
  49 
  50 static dev_info_t               *sdt_devi;
  51 static int                      sdt_verbose = 0;
  52 static sdt_probe_t              **sdt_probetab;
  53 static int                      sdt_probetab_size;
  54 static int                      sdt_probetab_mask;
  55 
  56 /*ARGSUSED*/
  57 static int
  58 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
  59 {
  60         uintptr_t stack0, stack1, stack2, stack3, stack4;
  61         int i = 0;
  62         sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
  63 
  64 #ifdef __amd64
  65         /*
  66          * On amd64, stack[0] contains the dereferenced stack pointer,
  67          * stack[1] contains savfp, stack[2] contains savpc.  We want
  68          * to step over these entries.
  69          */
  70         i += 3;
  71 #endif
  72 
  73         for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
  74                 if ((uintptr_t)sdt->sdp_patchpoint == addr) {
  75                         /*
  76                          * When accessing the arguments on the stack, we must
  77                          * protect against accessing beyond the stack.  We can
  78                          * safely set NOFAULT here -- we know that interrupts
  79                          * are already disabled.
  80                          */
  81                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
  82                         stack0 = stack[i++];
  83                         stack1 = stack[i++];
  84                         stack2 = stack[i++];
  85                         stack3 = stack[i++];
  86                         stack4 = stack[i++];
  87                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
  88                             CPU_DTRACE_BADADDR);
  89 
  90                         dtrace_probe(sdt->sdp_id, stack0, stack1,
  91                             stack2, stack3, stack4);
  92 
  93                         return (DTRACE_INVOP_NOP);
  94                 }
  95         }
  96 
  97         return (0);
  98 }
  99 
 100 /*ARGSUSED*/
 101 static void
 102 sdt_provide_module(void *arg, struct modctl *ctl)
 103 {
 104         struct module *mp = ctl->mod_mp;
 105         char *modname = ctl->mod_modname;
 106         sdt_probedesc_t *sdpd;
 107         sdt_probe_t *sdp, *old;
 108         sdt_provider_t *prov;
 109         int len;
 110 
 111         /*
 112          * One for all, and all for one:  if we haven't yet registered all of
 113          * our providers, we'll refuse to provide anything.
 114          */
 115         for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
 116                 if (prov->sdtp_id == DTRACE_PROVNONE)
 117                         return;
 118         }
 119 
 120         if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL)
 121                 return;
 122 
 123         for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) {
 124                 char *name = sdpd->sdpd_name, *func, *nname;
 125                 int i, j;
 126                 sdt_provider_t *prov;
 127                 ulong_t offs;
 128                 dtrace_id_t id;
 129 
 130                 for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) {
 131                         char *prefix = prov->sdtp_prefix;
 132 
 133                         if (strncmp(name, prefix, strlen(prefix)) == 0) {
 134                                 name += strlen(prefix);
 135                                 break;
 136                         }
 137                 }
 138 
 139                 nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP);
 140 
 141                 for (i = 0, j = 0; name[j] != '\0'; i++) {
 142                         if (name[j] == '_' && name[j + 1] == '_') {
 143                                 nname[i] = '-';
 144                                 j += 2;
 145                         } else {
 146                                 nname[i] = name[j++];
 147                         }
 148                 }
 149 
 150                 nname[i] = '\0';
 151 
 152                 sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP);
 153                 sdp->sdp_loadcnt = ctl->mod_loadcnt;
 154                 sdp->sdp_ctl = ctl;
 155                 sdp->sdp_name = nname;
 156                 sdp->sdp_namelen = len;
 157                 sdp->sdp_provider = prov;
 158 
 159                 func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs);
 160 
 161                 if (func == NULL)
 162                         func = "<unknown>";
 163 
 164                 /*
 165                  * We have our provider.  Now create the probe.
 166                  */
 167                 if ((id = dtrace_probe_lookup(prov->sdtp_id, modname,
 168                     func, nname)) != DTRACE_IDNONE) {
 169                         old = dtrace_probe_arg(prov->sdtp_id, id);
 170                         ASSERT(old != NULL);
 171 
 172                         sdp->sdp_next = old->sdp_next;
 173                         sdp->sdp_id = id;
 174                         old->sdp_next = sdp;
 175                 } else {
 176                         sdp->sdp_id = dtrace_probe_create(prov->sdtp_id,
 177                             modname, func, nname, 3, sdp);
 178 
 179                         mp->sdt_nprobes++;
 180                 }
 181 
 182                 sdp->sdp_hashnext =
 183                     sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)];
 184                 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp;
 185 
 186                 sdp->sdp_patchval = SDT_PATCHVAL;
 187                 sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset;
 188                 sdp->sdp_savedval = *sdp->sdp_patchpoint;
 189         }
 190 }
 191 
 192 /*ARGSUSED*/
 193 static void
 194 sdt_destroy(void *arg, dtrace_id_t id, void *parg)
 195 {
 196         sdt_probe_t *sdp = parg, *old, *last, *hash;
 197         struct modctl *ctl = sdp->sdp_ctl;
 198         int ndx;
 199 
 200         if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) {
 201                 if ((ctl->mod_loadcnt == sdp->sdp_loadcnt &&
 202                     ctl->mod_loaded)) {
 203                         ((struct module *)(ctl->mod_mp))->sdt_nprobes--;
 204                 }
 205         }
 206 
 207         while (sdp != NULL) {
 208                 old = sdp;
 209 
 210                 /*
 211                  * Now we need to remove this probe from the sdt_probetab.
 212                  */
 213                 ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint);
 214                 last = NULL;
 215                 hash = sdt_probetab[ndx];
 216 
 217                 while (hash != sdp) {
 218                         ASSERT(hash != NULL);
 219                         last = hash;
 220                         hash = hash->sdp_hashnext;
 221                 }
 222 
 223                 if (last != NULL) {
 224                         last->sdp_hashnext = sdp->sdp_hashnext;
 225                 } else {
 226                         sdt_probetab[ndx] = sdp->sdp_hashnext;
 227                 }
 228 
 229                 kmem_free(sdp->sdp_name, sdp->sdp_namelen);
 230                 sdp = sdp->sdp_next;
 231                 kmem_free(old, sizeof (sdt_probe_t));
 232         }
 233 }
 234 
 235 /*ARGSUSED*/
 236 static int
 237 sdt_enable(void *arg, dtrace_id_t id, void *parg)
 238 {
 239         sdt_probe_t *sdp = parg;
 240         struct modctl *ctl = sdp->sdp_ctl;
 241 
 242         ctl->mod_nenabled++;
 243 
 244         /*
 245          * If this module has disappeared since we discovered its probes,
 246          * refuse to enable it.
 247          */
 248         if (!ctl->mod_loaded) {
 249                 if (sdt_verbose) {
 250                         cmn_err(CE_NOTE, "sdt is failing for probe %s "
 251                             "(module %s unloaded)",
 252                             sdp->sdp_name, ctl->mod_modname);
 253                 }
 254                 goto err;
 255         }
 256 
 257         /*
 258          * Now check that our modctl has the expected load count.  If it
 259          * doesn't, this module must have been unloaded and reloaded -- and
 260          * we're not going to touch it.
 261          */
 262         if (ctl->mod_loadcnt != sdp->sdp_loadcnt) {
 263                 if (sdt_verbose) {
 264                         cmn_err(CE_NOTE, "sdt is failing for probe %s "
 265                             "(module %s reloaded)",
 266                             sdp->sdp_name, ctl->mod_modname);
 267                 }
 268                 goto err;
 269         }
 270 
 271         while (sdp != NULL) {
 272                 *sdp->sdp_patchpoint = sdp->sdp_patchval;
 273                 sdp = sdp->sdp_next;
 274         }
 275 err:
 276         return (0);
 277 }
 278 
 279 /*ARGSUSED*/
 280 static void
 281 sdt_disable(void *arg, dtrace_id_t id, void *parg)
 282 {
 283         sdt_probe_t *sdp = parg;
 284         struct modctl *ctl = sdp->sdp_ctl;
 285 
 286         ctl->mod_nenabled--;
 287 
 288         if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
 289                 goto err;
 290 
 291         while (sdp != NULL) {
 292                 *sdp->sdp_patchpoint = sdp->sdp_savedval;
 293                 sdp = sdp->sdp_next;
 294         }
 295 
 296 err:
 297         ;
 298 }
 299 
 300 /*ARGSUSED*/
 301 uint64_t
 302 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
 303 {
 304         uintptr_t val;
 305         struct frame *fp = (struct frame *)dtrace_getfp();
 306         uintptr_t *stack;
 307         int i;
 308 #if defined(__amd64)
 309         /*
 310          * A total of 6 arguments are passed via registers; any argument with
 311          * index of 5 or lower is therefore in a register.
 312          */
 313         int inreg = 5;
 314 #endif
 315 
 316         for (i = 1; i <= aframes; i++) {
 317                 fp = (struct frame *)(fp->fr_savfp);
 318 
 319                 if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
 320 #if !defined(__amd64)
 321                         /*
 322                          * If we pass through the invalid op handler, we will
 323                          * use the pointer that it passed to the stack as the
 324                          * second argument to dtrace_invop() as the pointer to
 325                          * the stack.
 326                          */
 327                         stack = ((uintptr_t **)&fp[1])[1];
 328 #else
 329                         /*
 330                          * In the case of amd64, we will use the pointer to the
 331                          * regs structure that was pushed when we took the
 332                          * trap.  To get this structure, we must increment
 333                          * beyond the frame structure, the calling RIP, and
 334                          * padding stored in dtrace_invop().  If the argument
 335                          * that we're seeking is passed on the stack, we'll
 336                          * pull the true stack pointer out of the saved
 337                          * registers and decrement our argument by the number
 338                          * of arguments passed in registers; if the argument
 339                          * we're seeking is passed in regsiters, we can just
 340                          * load it directly.
 341                          */
 342                         struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
 343                             sizeof (uintptr_t) * 2);
 344 
 345                         if (argno <= inreg) {
 346                                 stack = (uintptr_t *)&rp->r_rdi;
 347                         } else {
 348                                 stack = (uintptr_t *)(rp->r_rsp);
 349                                 argno -= (inreg + 1);
 350                         }
 351 #endif
 352                         goto load;
 353                 }
 354         }
 355 
 356         /*
 357          * We know that we did not come through a trap to get into
 358          * dtrace_probe() -- the provider simply called dtrace_probe()
 359          * directly.  As this is the case, we need to shift the argument
 360          * that we're looking for:  the probe ID is the first argument to
 361          * dtrace_probe(), so the argument n will actually be found where
 362          * one would expect to find argument (n + 1).
 363          */
 364         argno++;
 365 
 366 #if defined(__amd64)
 367         if (argno <= inreg) {
 368                 /*
 369                  * This shouldn't happen.  If the argument is passed in a
 370                  * register then it should have been, well, passed in a
 371                  * register...
 372                  */
 373                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 374                 return (0);
 375         }
 376 
 377         argno -= (inreg + 1);
 378 #endif
 379         stack = (uintptr_t *)&fp[1];
 380 
 381 load:
 382         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 383         val = stack[argno];
 384         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 385 
 386         return (val);
 387 }
 388 
 389 static dtrace_pops_t sdt_pops = {
 390         NULL,
 391         sdt_provide_module,
 392         sdt_enable,
 393         sdt_disable,
 394         NULL,
 395         NULL,
 396         sdt_getargdesc,
 397         sdt_getarg,
 398         NULL,
 399         sdt_destroy
 400 };
 401 
 402 /*ARGSUSED*/
 403 static int
 404 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 405 {
 406         sdt_provider_t *prov;
 407 
 408         if (ddi_create_minor_node(devi, "sdt", S_IFCHR,
 409             0, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 410                 cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node");
 411                 ddi_remove_minor_node(devi, NULL);
 412                 return (DDI_FAILURE);
 413         }
 414 
 415         ddi_report_dev(devi);
 416         sdt_devi = devi;
 417 
 418         if (sdt_probetab_size == 0)
 419                 sdt_probetab_size = SDT_PROBETAB_SIZE;
 420 
 421         sdt_probetab_mask = sdt_probetab_size - 1;
 422         sdt_probetab =
 423             kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP);
 424         dtrace_invop_add(sdt_invop);
 425 
 426         for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
 427                 uint32_t priv;
 428 
 429                 if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
 430                         priv = DTRACE_PRIV_KERNEL;
 431                         sdt_pops.dtps_mode = NULL;
 432                 } else {
 433                         priv = prov->sdtp_priv;
 434                         ASSERT(priv == DTRACE_PRIV_USER);
 435                         sdt_pops.dtps_mode = sdt_mode;
 436                 }
 437 
 438                 if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
 439                     priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
 440                         cmn_err(CE_WARN, "failed to register sdt provider %s",
 441                             prov->sdtp_name);
 442                 }
 443         }
 444 
 445         return (DDI_SUCCESS);
 446 }
 447 
 448 /*ARGSUSED*/
 449 static int
 450 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 451 {
 452         sdt_provider_t *prov;
 453 
 454         switch (cmd) {
 455         case DDI_DETACH:
 456                 break;
 457 
 458         case DDI_SUSPEND:
 459                 return (DDI_SUCCESS);
 460 
 461         default:
 462                 return (DDI_FAILURE);
 463         }
 464 
 465         for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
 466                 if (prov->sdtp_id != DTRACE_PROVNONE) {
 467                         if (dtrace_unregister(prov->sdtp_id) != 0)
 468                                 return (DDI_FAILURE);
 469 
 470                         prov->sdtp_id = DTRACE_PROVNONE;
 471                 }
 472         }
 473 
 474         dtrace_invop_remove(sdt_invop);
 475         kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *));
 476 
 477         return (DDI_SUCCESS);
 478 }
 479 
 480 /*ARGSUSED*/
 481 static int
 482 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 483 {
 484         int error;
 485 
 486         switch (infocmd) {
 487         case DDI_INFO_DEVT2DEVINFO:
 488                 *result = (void *)sdt_devi;
 489                 error = DDI_SUCCESS;
 490                 break;
 491         case DDI_INFO_DEVT2INSTANCE:
 492                 *result = (void *)0;
 493                 error = DDI_SUCCESS;
 494                 break;
 495         default:
 496                 error = DDI_FAILURE;
 497         }
 498         return (error);
 499 }
 500 
 501 /*ARGSUSED*/
 502 static int
 503 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 504 {
 505         return (0);
 506 }
 507 
 508 static struct cb_ops sdt_cb_ops = {
 509         sdt_open,               /* open */
 510         nodev,                  /* close */
 511         nulldev,                /* strategy */
 512         nulldev,                /* print */
 513         nodev,                  /* dump */
 514         nodev,                  /* read */
 515         nodev,                  /* write */
 516         nodev,                  /* ioctl */
 517         nodev,                  /* devmap */
 518         nodev,                  /* mmap */
 519         nodev,                  /* segmap */
 520         nochpoll,               /* poll */
 521         ddi_prop_op,            /* cb_prop_op */
 522         0,                      /* streamtab  */
 523         D_NEW | D_MP            /* Driver compatibility flag */
 524 };
 525 
 526 static struct dev_ops sdt_ops = {
 527         DEVO_REV,               /* devo_rev, */
 528         0,                      /* refcnt  */
 529         sdt_info,               /* get_dev_info */
 530         nulldev,                /* identify */
 531         nulldev,                /* probe */
 532         sdt_attach,             /* attach */
 533         sdt_detach,             /* detach */
 534         nodev,                  /* reset */
 535         &sdt_cb_ops,                /* driver operations */
 536         NULL,                   /* bus operations */
 537         nodev,                  /* dev power */
 538         ddi_quiesce_not_needed,         /* quiesce */
 539 };
 540 
 541 /*
 542  * Module linkage information for the kernel.
 543  */
 544 static struct modldrv modldrv = {
 545         &mod_driverops,             /* module type (this is a pseudo driver) */
 546         "Statically Defined Tracing",   /* name of module */
 547         &sdt_ops,           /* driver ops */
 548 };
 549 
 550 static struct modlinkage modlinkage = {
 551         MODREV_1,
 552         (void *)&modldrv,
 553         NULL
 554 };
 555 
 556 int
 557 _init(void)
 558 {
 559         return (mod_install(&modlinkage));
 560 }
 561 
 562 int
 563 _info(struct modinfo *modinfop)
 564 {
 565         return (mod_info(&modlinkage, modinfop));
 566 }
 567 
 568 int
 569 _fini(void)
 570 {
 571         return (mod_remove(&modlinkage));
 572 }