1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 28 * Copyright (c) 2013, 2014 by Delphix. All rights reserved. 29 */ 30 31 #include <sys/modctl.h> 32 #include <sys/sunddi.h> 33 #include <sys/dtrace.h> 34 #include <sys/kobj.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <vm/seg_kmem.h> 38 #include <sys/stack.h> 39 #include <sys/frame.h> 40 #include <sys/dtrace_impl.h> 41 #include <sys/cmn_err.h> 42 #include <sys/sysmacros.h> 43 #include <sys/privregs.h> 44 #include <sys/sdt_impl.h> 45 46 #define SDT_PATCHVAL 0xf0 47 #define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask) 48 #define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */ 49 50 static dev_info_t *sdt_devi; 51 static int sdt_verbose = 0; 52 static sdt_probe_t **sdt_probetab; 53 static int sdt_probetab_size; 54 static int sdt_probetab_mask; 55 56 /*ARGSUSED*/ 57 static int 58 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) 59 { 60 uintptr_t stack0, stack1, stack2, stack3, stack4; 61 int i = 0; 62 sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)]; 63 64 #ifdef __amd64 65 /* 66 * On amd64, stack[0] contains the dereferenced stack pointer, 67 * stack[1] contains savfp, stack[2] contains savpc. We want 68 * to step over these entries. 69 */ 70 i += 3; 71 #endif 72 73 for (; sdt != NULL; sdt = sdt->sdp_hashnext) { 74 if ((uintptr_t)sdt->sdp_patchpoint == addr) { 75 /* 76 * When accessing the arguments on the stack, we must 77 * protect against accessing beyond the stack. We can 78 * safely set NOFAULT here -- we know that interrupts 79 * are already disabled. 80 */ 81 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 82 stack0 = stack[i++]; 83 stack1 = stack[i++]; 84 stack2 = stack[i++]; 85 stack3 = stack[i++]; 86 stack4 = stack[i++]; 87 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | 88 CPU_DTRACE_BADADDR); 89 90 dtrace_probe(sdt->sdp_id, stack0, stack1, 91 stack2, stack3, stack4); 92 93 return (DTRACE_INVOP_NOP); 94 } 95 } 96 97 return (0); 98 } 99 100 /*ARGSUSED*/ 101 static void 102 sdt_provide_module(void *arg, struct modctl *ctl) 103 { 104 struct module *mp = ctl->mod_mp; 105 char *modname = ctl->mod_modname; 106 sdt_probedesc_t *sdpd; 107 sdt_probe_t *sdp, *old; 108 sdt_provider_t *prov; 109 int len; 110 111 /* 112 * One for all, and all for one: if we haven't yet registered all of 113 * our providers, we'll refuse to provide anything. 114 */ 115 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { 116 if (prov->sdtp_id == DTRACE_PROVNONE) 117 return; 118 } 119 120 if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL) 121 return; 122 123 for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) { 124 char *name = sdpd->sdpd_name, *func, *nname; 125 int i, j; 126 sdt_provider_t *prov; 127 ulong_t offs; 128 dtrace_id_t id; 129 130 for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) { 131 char *prefix = prov->sdtp_prefix; 132 133 if (strncmp(name, prefix, strlen(prefix)) == 0) { 134 name += strlen(prefix); 135 break; 136 } 137 } 138 139 nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP); 140 141 for (i = 0, j = 0; name[j] != '\0'; i++) { 142 if (name[j] == '_' && name[j + 1] == '_') { 143 nname[i] = '-'; 144 j += 2; 145 } else { 146 nname[i] = name[j++]; 147 } 148 } 149 150 nname[i] = '\0'; 151 152 sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP); 153 sdp->sdp_loadcnt = ctl->mod_loadcnt; 154 sdp->sdp_ctl = ctl; 155 sdp->sdp_name = nname; 156 sdp->sdp_namelen = len; 157 sdp->sdp_provider = prov; 158 159 func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs); 160 161 if (func == NULL) 162 func = "<unknown>"; 163 164 /* 165 * We have our provider. Now create the probe. 166 */ 167 if ((id = dtrace_probe_lookup(prov->sdtp_id, modname, 168 func, nname)) != DTRACE_IDNONE) { 169 old = dtrace_probe_arg(prov->sdtp_id, id); 170 ASSERT(old != NULL); 171 172 sdp->sdp_next = old->sdp_next; 173 sdp->sdp_id = id; 174 old->sdp_next = sdp; 175 } else { 176 sdp->sdp_id = dtrace_probe_create(prov->sdtp_id, 177 modname, func, nname, 3, sdp); 178 179 mp->sdt_nprobes++; 180 } 181 182 sdp->sdp_hashnext = 183 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)]; 184 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp; 185 186 sdp->sdp_patchval = SDT_PATCHVAL; 187 sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset; 188 sdp->sdp_savedval = *sdp->sdp_patchpoint; 189 } 190 } 191 192 /*ARGSUSED*/ 193 static void 194 sdt_destroy(void *arg, dtrace_id_t id, void *parg) 195 { 196 sdt_probe_t *sdp = parg, *old, *last, *hash; 197 struct modctl *ctl = sdp->sdp_ctl; 198 int ndx; 199 200 if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) { 201 if ((ctl->mod_loadcnt == sdp->sdp_loadcnt && 202 ctl->mod_loaded)) { 203 ((struct module *)(ctl->mod_mp))->sdt_nprobes--; 204 } 205 } 206 207 while (sdp != NULL) { 208 old = sdp; 209 210 /* 211 * Now we need to remove this probe from the sdt_probetab. 212 */ 213 ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint); 214 last = NULL; 215 hash = sdt_probetab[ndx]; 216 217 while (hash != sdp) { 218 ASSERT(hash != NULL); 219 last = hash; 220 hash = hash->sdp_hashnext; 221 } 222 223 if (last != NULL) { 224 last->sdp_hashnext = sdp->sdp_hashnext; 225 } else { 226 sdt_probetab[ndx] = sdp->sdp_hashnext; 227 } 228 229 kmem_free(sdp->sdp_name, sdp->sdp_namelen); 230 sdp = sdp->sdp_next; 231 kmem_free(old, sizeof (sdt_probe_t)); 232 } 233 } 234 235 /*ARGSUSED*/ 236 static int 237 sdt_enable(void *arg, dtrace_id_t id, void *parg) 238 { 239 sdt_probe_t *sdp = parg; 240 struct modctl *ctl = sdp->sdp_ctl; 241 242 ctl->mod_nenabled++; 243 244 /* 245 * If this module has disappeared since we discovered its probes, 246 * refuse to enable it. 247 */ 248 if (!ctl->mod_loaded) { 249 if (sdt_verbose) { 250 cmn_err(CE_NOTE, "sdt is failing for probe %s " 251 "(module %s unloaded)", 252 sdp->sdp_name, ctl->mod_modname); 253 } 254 goto err; 255 } 256 257 /* 258 * Now check that our modctl has the expected load count. If it 259 * doesn't, this module must have been unloaded and reloaded -- and 260 * we're not going to touch it. 261 */ 262 if (ctl->mod_loadcnt != sdp->sdp_loadcnt) { 263 if (sdt_verbose) { 264 cmn_err(CE_NOTE, "sdt is failing for probe %s " 265 "(module %s reloaded)", 266 sdp->sdp_name, ctl->mod_modname); 267 } 268 goto err; 269 } 270 271 while (sdp != NULL) { 272 *sdp->sdp_patchpoint = sdp->sdp_patchval; 273 sdp = sdp->sdp_next; 274 } 275 err: 276 return (0); 277 } 278 279 /*ARGSUSED*/ 280 static void 281 sdt_disable(void *arg, dtrace_id_t id, void *parg) 282 { 283 sdt_probe_t *sdp = parg; 284 struct modctl *ctl = sdp->sdp_ctl; 285 286 ctl->mod_nenabled--; 287 288 if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt) 289 goto err; 290 291 while (sdp != NULL) { 292 *sdp->sdp_patchpoint = sdp->sdp_savedval; 293 sdp = sdp->sdp_next; 294 } 295 296 err: 297 ; 298 } 299 300 /*ARGSUSED*/ 301 uint64_t 302 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 303 { 304 uintptr_t val; 305 struct frame *fp = (struct frame *)dtrace_getfp(); 306 uintptr_t *stack; 307 int i; 308 #if defined(__amd64) 309 /* 310 * A total of 6 arguments are passed via registers; any argument with 311 * index of 5 or lower is therefore in a register. 312 */ 313 int inreg = 5; 314 #endif 315 316 for (i = 1; i <= aframes; i++) { 317 fp = (struct frame *)(fp->fr_savfp); 318 319 if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) { 320 #if !defined(__amd64) 321 /* 322 * If we pass through the invalid op handler, we will 323 * use the pointer that it passed to the stack as the 324 * second argument to dtrace_invop() as the pointer to 325 * the stack. 326 */ 327 stack = ((uintptr_t **)&fp[1])[1]; 328 #else 329 /* 330 * In the case of amd64, we will use the pointer to the 331 * regs structure that was pushed when we took the 332 * trap. To get this structure, we must increment 333 * beyond the frame structure, the calling RIP, and 334 * padding stored in dtrace_invop(). If the argument 335 * that we're seeking is passed on the stack, we'll 336 * pull the true stack pointer out of the saved 337 * registers and decrement our argument by the number 338 * of arguments passed in registers; if the argument 339 * we're seeking is passed in regsiters, we can just 340 * load it directly. 341 */ 342 struct regs *rp = (struct regs *)((uintptr_t)&fp[1] + 343 sizeof (uintptr_t) * 2); 344 345 if (argno <= inreg) { 346 stack = (uintptr_t *)&rp->r_rdi; 347 } else { 348 stack = (uintptr_t *)(rp->r_rsp); 349 argno -= (inreg + 1); 350 } 351 #endif 352 goto load; 353 } 354 } 355 356 /* 357 * We know that we did not come through a trap to get into 358 * dtrace_probe() -- the provider simply called dtrace_probe() 359 * directly. As this is the case, we need to shift the argument 360 * that we're looking for: the probe ID is the first argument to 361 * dtrace_probe(), so the argument n will actually be found where 362 * one would expect to find argument (n + 1). 363 */ 364 argno++; 365 366 #if defined(__amd64) 367 if (argno <= inreg) { 368 /* 369 * This shouldn't happen. If the argument is passed in a 370 * register then it should have been, well, passed in a 371 * register... 372 */ 373 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 374 return (0); 375 } 376 377 argno -= (inreg + 1); 378 #endif 379 stack = (uintptr_t *)&fp[1]; 380 381 load: 382 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 383 val = stack[argno]; 384 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 385 386 return (val); 387 } 388 389 static dtrace_pops_t sdt_pops = { 390 NULL, 391 sdt_provide_module, 392 sdt_enable, 393 sdt_disable, 394 NULL, 395 NULL, 396 sdt_getargdesc, 397 sdt_getarg, 398 NULL, 399 sdt_destroy 400 }; 401 402 /*ARGSUSED*/ 403 static int 404 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 405 { 406 sdt_provider_t *prov; 407 408 if (ddi_create_minor_node(devi, "sdt", S_IFCHR, 409 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { 410 cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node"); 411 ddi_remove_minor_node(devi, NULL); 412 return (DDI_FAILURE); 413 } 414 415 ddi_report_dev(devi); 416 sdt_devi = devi; 417 418 if (sdt_probetab_size == 0) 419 sdt_probetab_size = SDT_PROBETAB_SIZE; 420 421 sdt_probetab_mask = sdt_probetab_size - 1; 422 sdt_probetab = 423 kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP); 424 dtrace_invop_add(sdt_invop); 425 426 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { 427 uint32_t priv; 428 429 if (prov->sdtp_priv == DTRACE_PRIV_NONE) { 430 priv = DTRACE_PRIV_KERNEL; 431 sdt_pops.dtps_mode = NULL; 432 } else { 433 priv = prov->sdtp_priv; 434 ASSERT(priv == DTRACE_PRIV_USER); 435 sdt_pops.dtps_mode = sdt_mode; 436 } 437 438 if (dtrace_register(prov->sdtp_name, prov->sdtp_attr, 439 priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) { 440 cmn_err(CE_WARN, "failed to register sdt provider %s", 441 prov->sdtp_name); 442 } 443 } 444 445 return (DDI_SUCCESS); 446 } 447 448 /*ARGSUSED*/ 449 static int 450 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 451 { 452 sdt_provider_t *prov; 453 454 switch (cmd) { 455 case DDI_DETACH: 456 break; 457 458 case DDI_SUSPEND: 459 return (DDI_SUCCESS); 460 461 default: 462 return (DDI_FAILURE); 463 } 464 465 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { 466 if (prov->sdtp_id != DTRACE_PROVNONE) { 467 if (dtrace_unregister(prov->sdtp_id) != 0) 468 return (DDI_FAILURE); 469 470 prov->sdtp_id = DTRACE_PROVNONE; 471 } 472 } 473 474 dtrace_invop_remove(sdt_invop); 475 kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *)); 476 477 return (DDI_SUCCESS); 478 } 479 480 /*ARGSUSED*/ 481 static int 482 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 483 { 484 int error; 485 486 switch (infocmd) { 487 case DDI_INFO_DEVT2DEVINFO: 488 *result = (void *)sdt_devi; 489 error = DDI_SUCCESS; 490 break; 491 case DDI_INFO_DEVT2INSTANCE: 492 *result = (void *)0; 493 error = DDI_SUCCESS; 494 break; 495 default: 496 error = DDI_FAILURE; 497 } 498 return (error); 499 } 500 501 /*ARGSUSED*/ 502 static int 503 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 504 { 505 return (0); 506 } 507 508 static struct cb_ops sdt_cb_ops = { 509 sdt_open, /* open */ 510 nodev, /* close */ 511 nulldev, /* strategy */ 512 nulldev, /* print */ 513 nodev, /* dump */ 514 nodev, /* read */ 515 nodev, /* write */ 516 nodev, /* ioctl */ 517 nodev, /* devmap */ 518 nodev, /* mmap */ 519 nodev, /* segmap */ 520 nochpoll, /* poll */ 521 ddi_prop_op, /* cb_prop_op */ 522 0, /* streamtab */ 523 D_NEW | D_MP /* Driver compatibility flag */ 524 }; 525 526 static struct dev_ops sdt_ops = { 527 DEVO_REV, /* devo_rev, */ 528 0, /* refcnt */ 529 sdt_info, /* get_dev_info */ 530 nulldev, /* identify */ 531 nulldev, /* probe */ 532 sdt_attach, /* attach */ 533 sdt_detach, /* detach */ 534 nodev, /* reset */ 535 &sdt_cb_ops, /* driver operations */ 536 NULL, /* bus operations */ 537 nodev, /* dev power */ 538 ddi_quiesce_not_needed, /* quiesce */ 539 }; 540 541 /* 542 * Module linkage information for the kernel. 543 */ 544 static struct modldrv modldrv = { 545 &mod_driverops, /* module type (this is a pseudo driver) */ 546 "Statically Defined Tracing", /* name of module */ 547 &sdt_ops, /* driver ops */ 548 }; 549 550 static struct modlinkage modlinkage = { 551 MODREV_1, 552 { (void *)&modldrv, NULL } 553 }; 554 555 int 556 _init(void) 557 { 558 return (mod_install(&modlinkage)); 559 } 560 561 int 562 _info(struct modinfo *modinfop) 563 { 564 return (mod_info(&modlinkage, modinfop)); 565 } 566 567 int 568 _fini(void) 569 { 570 return (mod_remove(&modlinkage)); 571 }