1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 25 */ 26 27 /* 28 * DTrace - Dynamic Tracing for Solaris 29 * 30 * This is the implementation of the Solaris Dynamic Tracing framework 31 * (DTrace). The user-visible interface to DTrace is described at length in 32 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace 33 * library, the in-kernel DTrace framework, and the DTrace providers are 34 * described in the block comments in the <sys/dtrace.h> header file. The 35 * internal architecture of DTrace is described in the block comments in the 36 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace 37 * implementation very much assume mastery of all of these sources; if one has 38 * an unanswered question about the implementation, one should consult them 39 * first. 40 * 41 * The functions here are ordered roughly as follows: 42 * 43 * - Probe context functions 44 * - Probe hashing functions 45 * - Non-probe context utility functions 46 * - Matching functions 47 * - Provider-to-Framework API functions 48 * - Probe management functions 49 * - DIF object functions 50 * - Format functions 51 * - Predicate functions 52 * - ECB functions 53 * - Buffer functions 54 * - Enabling functions 55 * - DOF functions 56 * - Anonymous enabling functions 57 * - Consumer state functions 58 * - Helper functions 59 * - Hook functions 60 * - Driver cookbook functions 61 * 62 * Each group of functions begins with a block comment labelled the "DTrace 63 * [Group] Functions", allowing one to find each block by searching forward 64 * on capital-f functions. 65 */ 66 #include <sys/errno.h> 67 #include <sys/stat.h> 68 #include <sys/modctl.h> 69 #include <sys/conf.h> 70 #include <sys/systm.h> 71 #include <sys/ddi.h> 72 #include <sys/sunddi.h> 73 #include <sys/cpuvar.h> 74 #include <sys/kmem.h> 75 #include <sys/strsubr.h> 76 #include <sys/sysmacros.h> 77 #include <sys/dtrace_impl.h> 78 #include <sys/atomic.h> 79 #include <sys/cmn_err.h> 80 #include <sys/mutex_impl.h> 81 #include <sys/rwlock_impl.h> 82 #include <sys/ctf_api.h> 83 #include <sys/panic.h> 84 #include <sys/priv_impl.h> 85 #include <sys/policy.h> 86 #include <sys/cred_impl.h> 87 #include <sys/procfs_isa.h> 88 #include <sys/taskq.h> 89 #include <sys/mkdev.h> 90 #include <sys/kdi.h> 91 #include <sys/zone.h> 92 #include <sys/socket.h> 93 #include <netinet/in.h> 94 95 /* 96 * DTrace Tunable Variables 97 * 98 * The following variables may be tuned by adding a line to /etc/system that 99 * includes both the name of the DTrace module ("dtrace") and the name of the 100 * variable. For example: 101 * 102 * set dtrace:dtrace_destructive_disallow = 1 103 * 104 * In general, the only variables that one should be tuning this way are those 105 * that affect system-wide DTrace behavior, and for which the default behavior 106 * is undesirable. Most of these variables are tunable on a per-consumer 107 * basis using DTrace options, and need not be tuned on a system-wide basis. 108 * When tuning these variables, avoid pathological values; while some attempt 109 * is made to verify the integrity of these variables, they are not considered 110 * part of the supported interface to DTrace, and they are therefore not 111 * checked comprehensively. Further, these variables should not be tuned 112 * dynamically via "mdb -kw" or other means; they should only be tuned via 113 * /etc/system. 114 */ 115 int dtrace_destructive_disallow = 0; 116 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); 117 size_t dtrace_difo_maxsize = (256 * 1024); 118 dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); 119 size_t dtrace_global_maxsize = (16 * 1024); 120 size_t dtrace_actions_max = (16 * 1024); 121 size_t dtrace_retain_max = 1024; 122 dtrace_optval_t dtrace_helper_actions_max = 1024; 123 dtrace_optval_t dtrace_helper_providers_max = 32; 124 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); 125 size_t dtrace_strsize_default = 256; 126 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ 127 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ 128 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ 129 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ 130 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ 131 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ 132 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ 133 dtrace_optval_t dtrace_nspec_default = 1; 134 dtrace_optval_t dtrace_specsize_default = 32 * 1024; 135 dtrace_optval_t dtrace_stackframes_default = 20; 136 dtrace_optval_t dtrace_ustackframes_default = 20; 137 dtrace_optval_t dtrace_jstackframes_default = 50; 138 dtrace_optval_t dtrace_jstackstrsize_default = 512; 139 int dtrace_msgdsize_max = 128; 140 hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ 141 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ 142 int dtrace_devdepth_max = 32; 143 int dtrace_err_verbose; 144 hrtime_t dtrace_deadman_interval = NANOSEC; 145 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; 146 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; 147 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; 148 149 /* 150 * DTrace External Variables 151 * 152 * As dtrace(7D) is a kernel module, any DTrace variables are obviously 153 * available to DTrace consumers via the backtick (`) syntax. One of these, 154 * dtrace_zero, is made deliberately so: it is provided as a source of 155 * well-known, zero-filled memory. While this variable is not documented, 156 * it is used by some translators as an implementation detail. 157 */ 158 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ 159 160 /* 161 * DTrace Internal Variables 162 */ 163 static dev_info_t *dtrace_devi; /* device info */ 164 static vmem_t *dtrace_arena; /* probe ID arena */ 165 static vmem_t *dtrace_minor; /* minor number arena */ 166 static taskq_t *dtrace_taskq; /* task queue */ 167 static dtrace_probe_t **dtrace_probes; /* array of all probes */ 168 static int dtrace_nprobes; /* number of probes */ 169 static dtrace_provider_t *dtrace_provider; /* provider list */ 170 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ 171 static int dtrace_opens; /* number of opens */ 172 static int dtrace_helpers; /* number of helpers */ 173 static void *dtrace_softstate; /* softstate pointer */ 174 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ 175 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ 176 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ 177 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ 178 static int dtrace_toxranges; /* number of toxic ranges */ 179 static int dtrace_toxranges_max; /* size of toxic range array */ 180 static dtrace_anon_t dtrace_anon; /* anonymous enabling */ 181 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ 182 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ 183 static kthread_t *dtrace_panicked; /* panicking thread */ 184 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ 185 static dtrace_genid_t dtrace_probegen; /* current probe generation */ 186 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ 187 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ 188 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ 189 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ 190 static int dtrace_dynvar_failclean; /* dynvars failed to clean */ 191 192 /* 193 * DTrace Locking 194 * DTrace is protected by three (relatively coarse-grained) locks: 195 * 196 * (1) dtrace_lock is required to manipulate essentially any DTrace state, 197 * including enabling state, probes, ECBs, consumer state, helper state, 198 * etc. Importantly, dtrace_lock is _not_ required when in probe context; 199 * probe context is lock-free -- synchronization is handled via the 200 * dtrace_sync() cross call mechanism. 201 * 202 * (2) dtrace_provider_lock is required when manipulating provider state, or 203 * when provider state must be held constant. 204 * 205 * (3) dtrace_meta_lock is required when manipulating meta provider state, or 206 * when meta provider state must be held constant. 207 * 208 * The lock ordering between these three locks is dtrace_meta_lock before 209 * dtrace_provider_lock before dtrace_lock. (In particular, there are 210 * several places where dtrace_provider_lock is held by the framework as it 211 * calls into the providers -- which then call back into the framework, 212 * grabbing dtrace_lock.) 213 * 214 * There are two other locks in the mix: mod_lock and cpu_lock. With respect 215 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 216 * role as a coarse-grained lock; it is acquired before both of these locks. 217 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must 218 * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 219 * mod_lock is similar with respect to dtrace_provider_lock in that it must be 220 * acquired _between_ dtrace_provider_lock and dtrace_lock. 221 */ 222 static kmutex_t dtrace_lock; /* probe state lock */ 223 static kmutex_t dtrace_provider_lock; /* provider state lock */ 224 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ 225 226 /* 227 * DTrace Provider Variables 228 * 229 * These are the variables relating to DTrace as a provider (that is, the 230 * provider of the BEGIN, END, and ERROR probes). 231 */ 232 static dtrace_pattr_t dtrace_provider_attr = { 233 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 234 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 235 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 236 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 237 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 238 }; 239 240 static void 241 dtrace_nullop(void) 242 {} 243 244 static int 245 dtrace_enable_nullop(void) 246 { 247 return (0); 248 } 249 250 static dtrace_pops_t dtrace_provider_ops = { 251 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, 252 (void (*)(void *, struct modctl *))dtrace_nullop, 253 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, 254 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 255 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 256 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 257 NULL, 258 NULL, 259 NULL, 260 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop 261 }; 262 263 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ 264 static dtrace_id_t dtrace_probeid_end; /* special END probe */ 265 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 266 267 /* 268 * DTrace Helper Tracing Variables 269 */ 270 uint32_t dtrace_helptrace_next = 0; 271 uint32_t dtrace_helptrace_nlocals; 272 char *dtrace_helptrace_buffer; 273 int dtrace_helptrace_bufsize = 512 * 1024; 274 275 #ifdef DEBUG 276 int dtrace_helptrace_enabled = 1; 277 #else 278 int dtrace_helptrace_enabled = 0; 279 #endif 280 281 /* 282 * DTrace Error Hashing 283 * 284 * On DEBUG kernels, DTrace will track the errors that has seen in a hash 285 * table. This is very useful for checking coverage of tests that are 286 * expected to induce DIF or DOF processing errors, and may be useful for 287 * debugging problems in the DIF code generator or in DOF generation . The 288 * error hash may be examined with the ::dtrace_errhash MDB dcmd. 289 */ 290 #ifdef DEBUG 291 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; 292 static const char *dtrace_errlast; 293 static kthread_t *dtrace_errthread; 294 static kmutex_t dtrace_errlock; 295 #endif 296 297 /* 298 * DTrace Macros and Constants 299 * 300 * These are various macros that are useful in various spots in the 301 * implementation, along with a few random constants that have no meaning 302 * outside of the implementation. There is no real structure to this cpp 303 * mishmash -- but is there ever? 304 */ 305 #define DTRACE_HASHSTR(hash, probe) \ 306 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 307 308 #define DTRACE_HASHNEXT(hash, probe) \ 309 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 310 311 #define DTRACE_HASHPREV(hash, probe) \ 312 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 313 314 #define DTRACE_HASHEQ(hash, lhs, rhs) \ 315 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 316 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 317 318 #define DTRACE_AGGHASHSIZE_SLEW 17 319 320 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) 321 322 /* 323 * The key for a thread-local variable consists of the lower 61 bits of the 324 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 325 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 326 * equal to a variable identifier. This is necessary (but not sufficient) to 327 * assure that global associative arrays never collide with thread-local 328 * variables. To guarantee that they cannot collide, we must also define the 329 * order for keying dynamic variables. That order is: 330 * 331 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 332 * 333 * Because the variable-key and the tls-key are in orthogonal spaces, there is 334 * no way for a global variable key signature to match a thread-local key 335 * signature. 336 */ 337 #define DTRACE_TLS_THRKEY(where) { \ 338 uint_t intr = 0; \ 339 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 340 for (; actv; actv >>= 1) \ 341 intr++; \ 342 ASSERT(intr < (1 << 3)); \ 343 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 344 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 345 } 346 347 #define DT_BSWAP_8(x) ((x) & 0xff) 348 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) 349 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) 350 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) 351 352 #define DT_MASK_LO 0x00000000FFFFFFFFULL 353 354 #define DTRACE_STORE(type, tomax, offset, what) \ 355 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 356 357 #ifndef __i386 358 #define DTRACE_ALIGNCHECK(addr, size, flags) \ 359 if (addr & (size - 1)) { \ 360 *flags |= CPU_DTRACE_BADALIGN; \ 361 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 362 return (0); \ 363 } 364 #else 365 #define DTRACE_ALIGNCHECK(addr, size, flags) 366 #endif 367 368 /* 369 * Test whether a range of memory starting at testaddr of size testsz falls 370 * within the range of memory described by addr, sz. We take care to avoid 371 * problems with overflow and underflow of the unsigned quantities, and 372 * disallow all negative sizes. Ranges of size 0 are allowed. 373 */ 374 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ 375 ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \ 376 (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \ 377 (testaddr) + (testsz) >= (testaddr)) 378 379 /* 380 * Test whether alloc_sz bytes will fit in the scratch region. We isolate 381 * alloc_sz on the righthand side of the comparison in order to avoid overflow 382 * or underflow in the comparison with it. This is simpler than the INRANGE 383 * check above, because we know that the dtms_scratch_ptr is valid in the 384 * range. Allocations of size zero are allowed. 385 */ 386 #define DTRACE_INSCRATCH(mstate, alloc_sz) \ 387 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ 388 (mstate)->dtms_scratch_ptr >= (alloc_sz)) 389 390 #define DTRACE_LOADFUNC(bits) \ 391 /*CSTYLED*/ \ 392 uint##bits##_t \ 393 dtrace_load##bits(uintptr_t addr) \ 394 { \ 395 size_t size = bits / NBBY; \ 396 /*CSTYLED*/ \ 397 uint##bits##_t rval; \ 398 int i; \ 399 volatile uint16_t *flags = (volatile uint16_t *) \ 400 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ 401 \ 402 DTRACE_ALIGNCHECK(addr, size, flags); \ 403 \ 404 for (i = 0; i < dtrace_toxranges; i++) { \ 405 if (addr >= dtrace_toxrange[i].dtt_limit) \ 406 continue; \ 407 \ 408 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 409 continue; \ 410 \ 411 /* \ 412 * This address falls within a toxic region; return 0. \ 413 */ \ 414 *flags |= CPU_DTRACE_BADADDR; \ 415 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 416 return (0); \ 417 } \ 418 \ 419 *flags |= CPU_DTRACE_NOFAULT; \ 420 /*CSTYLED*/ \ 421 rval = *((volatile uint##bits##_t *)addr); \ 422 *flags &= ~CPU_DTRACE_NOFAULT; \ 423 \ 424 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ 425 } 426 427 #ifdef _LP64 428 #define dtrace_loadptr dtrace_load64 429 #else 430 #define dtrace_loadptr dtrace_load32 431 #endif 432 433 #define DTRACE_DYNHASH_FREE 0 434 #define DTRACE_DYNHASH_SINK 1 435 #define DTRACE_DYNHASH_VALID 2 436 437 #define DTRACE_MATCH_FAIL -1 438 #define DTRACE_MATCH_NEXT 0 439 #define DTRACE_MATCH_DONE 1 440 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') 441 #define DTRACE_STATE_ALIGN 64 442 443 #define DTRACE_FLAGS2FLT(flags) \ 444 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ 445 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ 446 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ 447 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ 448 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ 449 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ 450 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ 451 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ 452 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ 453 DTRACEFLT_UNKNOWN) 454 455 #define DTRACEACT_ISSTRING(act) \ 456 ((act)->dta_kind == DTRACEACT_DIFEXPR && \ 457 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 458 459 static size_t dtrace_strlen(const char *, size_t); 460 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); 461 static void dtrace_enabling_provide(dtrace_provider_t *); 462 static int dtrace_enabling_match(dtrace_enabling_t *, int *); 463 static void dtrace_enabling_matchall(void); 464 static void dtrace_enabling_reap(void); 465 static dtrace_state_t *dtrace_anon_grab(void); 466 static uint64_t dtrace_helper(int, dtrace_mstate_t *, 467 dtrace_state_t *, uint64_t, uint64_t); 468 static dtrace_helpers_t *dtrace_helpers_create(proc_t *); 469 static void dtrace_buffer_drop(dtrace_buffer_t *); 470 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); 471 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, 472 dtrace_state_t *, dtrace_mstate_t *); 473 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, 474 dtrace_optval_t); 475 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); 476 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); 477 static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *); 478 479 /* 480 * DTrace Probe Context Functions 481 * 482 * These functions are called from probe context. Because probe context is 483 * any context in which C may be called, arbitrarily locks may be held, 484 * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 485 * As a result, functions called from probe context may only call other DTrace 486 * support functions -- they may not interact at all with the system at large. 487 * (Note that the ASSERT macro is made probe-context safe by redefining it in 488 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 489 * loads are to be performed from probe context, they _must_ be in terms of 490 * the safe dtrace_load*() variants. 491 * 492 * Some functions in this block are not actually called from probe context; 493 * for these functions, there will be a comment above the function reading 494 * "Note: not called from probe context." 495 */ 496 void 497 dtrace_panic(const char *format, ...) 498 { 499 va_list alist; 500 501 va_start(alist, format); 502 dtrace_vpanic(format, alist); 503 va_end(alist); 504 } 505 506 int 507 dtrace_assfail(const char *a, const char *f, int l) 508 { 509 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); 510 511 /* 512 * We just need something here that even the most clever compiler 513 * cannot optimize away. 514 */ 515 return (a[(uintptr_t)f]); 516 } 517 518 /* 519 * Atomically increment a specified error counter from probe context. 520 */ 521 static void 522 dtrace_error(uint32_t *counter) 523 { 524 /* 525 * Most counters stored to in probe context are per-CPU counters. 526 * However, there are some error conditions that are sufficiently 527 * arcane that they don't merit per-CPU storage. If these counters 528 * are incremented concurrently on different CPUs, scalability will be 529 * adversely affected -- but we don't expect them to be white-hot in a 530 * correctly constructed enabling... 531 */ 532 uint32_t oval, nval; 533 534 do { 535 oval = *counter; 536 537 if ((nval = oval + 1) == 0) { 538 /* 539 * If the counter would wrap, set it to 1 -- assuring 540 * that the counter is never zero when we have seen 541 * errors. (The counter must be 32-bits because we 542 * aren't guaranteed a 64-bit compare&swap operation.) 543 * To save this code both the infamy of being fingered 544 * by a priggish news story and the indignity of being 545 * the target of a neo-puritan witch trial, we're 546 * carefully avoiding any colorful description of the 547 * likelihood of this condition -- but suffice it to 548 * say that it is only slightly more likely than the 549 * overflow of predicate cache IDs, as discussed in 550 * dtrace_predicate_create(). 551 */ 552 nval = 1; 553 } 554 } while (dtrace_cas32(counter, oval, nval) != oval); 555 } 556 557 /* 558 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 559 * uint8_t, a uint16_t, a uint32_t and a uint64_t. 560 */ 561 DTRACE_LOADFUNC(8) 562 DTRACE_LOADFUNC(16) 563 DTRACE_LOADFUNC(32) 564 DTRACE_LOADFUNC(64) 565 566 static int 567 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) 568 { 569 if (dest < mstate->dtms_scratch_base) 570 return (0); 571 572 if (dest + size < dest) 573 return (0); 574 575 if (dest + size > mstate->dtms_scratch_ptr) 576 return (0); 577 578 return (1); 579 } 580 581 static int 582 dtrace_canstore_statvar(uint64_t addr, size_t sz, 583 dtrace_statvar_t **svars, int nsvars) 584 { 585 int i; 586 587 for (i = 0; i < nsvars; i++) { 588 dtrace_statvar_t *svar = svars[i]; 589 590 if (svar == NULL || svar->dtsv_size == 0) 591 continue; 592 593 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) 594 return (1); 595 } 596 597 return (0); 598 } 599 600 /* 601 * Check to see if the address is within a memory region to which a store may 602 * be issued. This includes the DTrace scratch areas, and any DTrace variable 603 * region. The caller of dtrace_canstore() is responsible for performing any 604 * alignment checks that are needed before stores are actually executed. 605 */ 606 static int 607 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 608 dtrace_vstate_t *vstate) 609 { 610 /* 611 * First, check to see if the address is in scratch space... 612 */ 613 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, 614 mstate->dtms_scratch_size)) 615 return (1); 616 617 /* 618 * Now check to see if it's a dynamic variable. This check will pick 619 * up both thread-local variables and any global dynamically-allocated 620 * variables. 621 */ 622 if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base, 623 vstate->dtvs_dynvars.dtds_size)) { 624 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 625 uintptr_t base = (uintptr_t)dstate->dtds_base + 626 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); 627 uintptr_t chunkoffs; 628 629 /* 630 * Before we assume that we can store here, we need to make 631 * sure that it isn't in our metadata -- storing to our 632 * dynamic variable metadata would corrupt our state. For 633 * the range to not include any dynamic variable metadata, 634 * it must: 635 * 636 * (1) Start above the hash table that is at the base of 637 * the dynamic variable space 638 * 639 * (2) Have a starting chunk offset that is beyond the 640 * dtrace_dynvar_t that is at the base of every chunk 641 * 642 * (3) Not span a chunk boundary 643 * 644 */ 645 if (addr < base) 646 return (0); 647 648 chunkoffs = (addr - base) % dstate->dtds_chunksize; 649 650 if (chunkoffs < sizeof (dtrace_dynvar_t)) 651 return (0); 652 653 if (chunkoffs + sz > dstate->dtds_chunksize) 654 return (0); 655 656 return (1); 657 } 658 659 /* 660 * Finally, check the static local and global variables. These checks 661 * take the longest, so we perform them last. 662 */ 663 if (dtrace_canstore_statvar(addr, sz, 664 vstate->dtvs_locals, vstate->dtvs_nlocals)) 665 return (1); 666 667 if (dtrace_canstore_statvar(addr, sz, 668 vstate->dtvs_globals, vstate->dtvs_nglobals)) 669 return (1); 670 671 return (0); 672 } 673 674 675 /* 676 * Convenience routine to check to see if the address is within a memory 677 * region in which a load may be issued given the user's privilege level; 678 * if not, it sets the appropriate error flags and loads 'addr' into the 679 * illegal value slot. 680 * 681 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement 682 * appropriate memory access protection. 683 */ 684 static int 685 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 686 dtrace_vstate_t *vstate) 687 { 688 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 689 690 /* 691 * If we hold the privilege to read from kernel memory, then 692 * everything is readable. 693 */ 694 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 695 return (1); 696 697 /* 698 * You can obviously read that which you can store. 699 */ 700 if (dtrace_canstore(addr, sz, mstate, vstate)) 701 return (1); 702 703 /* 704 * We're allowed to read from our own string table. 705 */ 706 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab, 707 mstate->dtms_difo->dtdo_strlen)) 708 return (1); 709 710 if (vstate->dtvs_state != NULL && 711 dtrace_priv_proc(vstate->dtvs_state, mstate)) { 712 proc_t *p; 713 714 /* 715 * When we have privileges to the current process, there are 716 * several context-related kernel structures that are safe to 717 * read, even absent the privilege to read from kernel memory. 718 * These reads are safe because these structures contain only 719 * state that (1) we're permitted to read, (2) is harmless or 720 * (3) contains pointers to additional kernel state that we're 721 * not permitted to read (and as such, do not present an 722 * opportunity for privilege escalation). Finally (and 723 * critically), because of the nature of their relation with 724 * the current thread context, the memory associated with these 725 * structures cannot change over the duration of probe context, 726 * and it is therefore impossible for this memory to be 727 * deallocated and reallocated as something else while it's 728 * being operated upon. 729 */ 730 if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) 731 return (1); 732 733 if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr, 734 sz, curthread->t_procp, sizeof (proc_t))) { 735 return (1); 736 } 737 738 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz, 739 curthread->t_cred, sizeof (cred_t))) { 740 return (1); 741 } 742 743 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz, 744 &(p->p_pidp->pid_id), sizeof (pid_t))) { 745 return (1); 746 } 747 748 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz, 749 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) { 750 return (1); 751 } 752 } 753 754 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); 755 *illval = addr; 756 return (0); 757 } 758 759 /* 760 * Convenience routine to check to see if a given string is within a memory 761 * region in which a load may be issued given the user's privilege level; 762 * this exists so that we don't need to issue unnecessary dtrace_strlen() 763 * calls in the event that the user has all privileges. 764 */ 765 static int 766 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 767 dtrace_vstate_t *vstate) 768 { 769 size_t strsz; 770 771 /* 772 * If we hold the privilege to read from kernel memory, then 773 * everything is readable. 774 */ 775 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 776 return (1); 777 778 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); 779 if (dtrace_canload(addr, strsz, mstate, vstate)) 780 return (1); 781 782 return (0); 783 } 784 785 /* 786 * Convenience routine to check to see if a given variable is within a memory 787 * region in which a load may be issued given the user's privilege level. 788 */ 789 static int 790 dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, 791 dtrace_vstate_t *vstate) 792 { 793 size_t sz; 794 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 795 796 /* 797 * If we hold the privilege to read from kernel memory, then 798 * everything is readable. 799 */ 800 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 801 return (1); 802 803 if (type->dtdt_kind == DIF_TYPE_STRING) 804 sz = dtrace_strlen(src, 805 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; 806 else 807 sz = type->dtdt_size; 808 809 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); 810 } 811 812 /* 813 * Compare two strings using safe loads. 814 */ 815 static int 816 dtrace_strncmp(char *s1, char *s2, size_t limit) 817 { 818 uint8_t c1, c2; 819 volatile uint16_t *flags; 820 821 if (s1 == s2 || limit == 0) 822 return (0); 823 824 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 825 826 do { 827 if (s1 == NULL) { 828 c1 = '\0'; 829 } else { 830 c1 = dtrace_load8((uintptr_t)s1++); 831 } 832 833 if (s2 == NULL) { 834 c2 = '\0'; 835 } else { 836 c2 = dtrace_load8((uintptr_t)s2++); 837 } 838 839 if (c1 != c2) 840 return (c1 - c2); 841 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); 842 843 return (0); 844 } 845 846 /* 847 * Compute strlen(s) for a string using safe memory accesses. The additional 848 * len parameter is used to specify a maximum length to ensure completion. 849 */ 850 static size_t 851 dtrace_strlen(const char *s, size_t lim) 852 { 853 uint_t len; 854 855 for (len = 0; len != lim; len++) { 856 if (dtrace_load8((uintptr_t)s++) == '\0') 857 break; 858 } 859 860 return (len); 861 } 862 863 /* 864 * Check if an address falls within a toxic region. 865 */ 866 static int 867 dtrace_istoxic(uintptr_t kaddr, size_t size) 868 { 869 uintptr_t taddr, tsize; 870 int i; 871 872 for (i = 0; i < dtrace_toxranges; i++) { 873 taddr = dtrace_toxrange[i].dtt_base; 874 tsize = dtrace_toxrange[i].dtt_limit - taddr; 875 876 if (kaddr - taddr < tsize) { 877 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 878 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr; 879 return (1); 880 } 881 882 if (taddr - kaddr < size) { 883 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 884 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr; 885 return (1); 886 } 887 } 888 889 return (0); 890 } 891 892 /* 893 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe 894 * memory specified by the DIF program. The dst is assumed to be safe memory 895 * that we can store to directly because it is managed by DTrace. As with 896 * standard bcopy, overlapping copies are handled properly. 897 */ 898 static void 899 dtrace_bcopy(const void *src, void *dst, size_t len) 900 { 901 if (len != 0) { 902 uint8_t *s1 = dst; 903 const uint8_t *s2 = src; 904 905 if (s1 <= s2) { 906 do { 907 *s1++ = dtrace_load8((uintptr_t)s2++); 908 } while (--len != 0); 909 } else { 910 s2 += len; 911 s1 += len; 912 913 do { 914 *--s1 = dtrace_load8((uintptr_t)--s2); 915 } while (--len != 0); 916 } 917 } 918 } 919 920 /* 921 * Copy src to dst using safe memory accesses, up to either the specified 922 * length, or the point that a nul byte is encountered. The src is assumed to 923 * be unsafe memory specified by the DIF program. The dst is assumed to be 924 * safe memory that we can store to directly because it is managed by DTrace. 925 * Unlike dtrace_bcopy(), overlapping regions are not handled. 926 */ 927 static void 928 dtrace_strcpy(const void *src, void *dst, size_t len) 929 { 930 if (len != 0) { 931 uint8_t *s1 = dst, c; 932 const uint8_t *s2 = src; 933 934 do { 935 *s1++ = c = dtrace_load8((uintptr_t)s2++); 936 } while (--len != 0 && c != '\0'); 937 } 938 } 939 940 /* 941 * Copy src to dst, deriving the size and type from the specified (BYREF) 942 * variable type. The src is assumed to be unsafe memory specified by the DIF 943 * program. The dst is assumed to be DTrace variable memory that is of the 944 * specified type; we assume that we can store to directly. 945 */ 946 static void 947 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) 948 { 949 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 950 951 if (type->dtdt_kind == DIF_TYPE_STRING) { 952 dtrace_strcpy(src, dst, type->dtdt_size); 953 } else { 954 dtrace_bcopy(src, dst, type->dtdt_size); 955 } 956 } 957 958 /* 959 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be 960 * unsafe memory specified by the DIF program. The s2 data is assumed to be 961 * safe memory that we can access directly because it is managed by DTrace. 962 */ 963 static int 964 dtrace_bcmp(const void *s1, const void *s2, size_t len) 965 { 966 volatile uint16_t *flags; 967 968 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 969 970 if (s1 == s2) 971 return (0); 972 973 if (s1 == NULL || s2 == NULL) 974 return (1); 975 976 if (s1 != s2 && len != 0) { 977 const uint8_t *ps1 = s1; 978 const uint8_t *ps2 = s2; 979 980 do { 981 if (dtrace_load8((uintptr_t)ps1++) != *ps2++) 982 return (1); 983 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); 984 } 985 return (0); 986 } 987 988 /* 989 * Zero the specified region using a simple byte-by-byte loop. Note that this 990 * is for safe DTrace-managed memory only. 991 */ 992 static void 993 dtrace_bzero(void *dst, size_t len) 994 { 995 uchar_t *cp; 996 997 for (cp = dst; len != 0; len--) 998 *cp++ = 0; 999 } 1000 1001 static void 1002 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) 1003 { 1004 uint64_t result[2]; 1005 1006 result[0] = addend1[0] + addend2[0]; 1007 result[1] = addend1[1] + addend2[1] + 1008 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); 1009 1010 sum[0] = result[0]; 1011 sum[1] = result[1]; 1012 } 1013 1014 /* 1015 * Shift the 128-bit value in a by b. If b is positive, shift left. 1016 * If b is negative, shift right. 1017 */ 1018 static void 1019 dtrace_shift_128(uint64_t *a, int b) 1020 { 1021 uint64_t mask; 1022 1023 if (b == 0) 1024 return; 1025 1026 if (b < 0) { 1027 b = -b; 1028 if (b >= 64) { 1029 a[0] = a[1] >> (b - 64); 1030 a[1] = 0; 1031 } else { 1032 a[0] >>= b; 1033 mask = 1LL << (64 - b); 1034 mask -= 1; 1035 a[0] |= ((a[1] & mask) << (64 - b)); 1036 a[1] >>= b; 1037 } 1038 } else { 1039 if (b >= 64) { 1040 a[1] = a[0] << (b - 64); 1041 a[0] = 0; 1042 } else { 1043 a[1] <<= b; 1044 mask = a[0] >> (64 - b); 1045 a[1] |= mask; 1046 a[0] <<= b; 1047 } 1048 } 1049 } 1050 1051 /* 1052 * The basic idea is to break the 2 64-bit values into 4 32-bit values, 1053 * use native multiplication on those, and then re-combine into the 1054 * resulting 128-bit value. 1055 * 1056 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = 1057 * hi1 * hi2 << 64 + 1058 * hi1 * lo2 << 32 + 1059 * hi2 * lo1 << 32 + 1060 * lo1 * lo2 1061 */ 1062 static void 1063 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) 1064 { 1065 uint64_t hi1, hi2, lo1, lo2; 1066 uint64_t tmp[2]; 1067 1068 hi1 = factor1 >> 32; 1069 hi2 = factor2 >> 32; 1070 1071 lo1 = factor1 & DT_MASK_LO; 1072 lo2 = factor2 & DT_MASK_LO; 1073 1074 product[0] = lo1 * lo2; 1075 product[1] = hi1 * hi2; 1076 1077 tmp[0] = hi1 * lo2; 1078 tmp[1] = 0; 1079 dtrace_shift_128(tmp, 32); 1080 dtrace_add_128(product, tmp, product); 1081 1082 tmp[0] = hi2 * lo1; 1083 tmp[1] = 0; 1084 dtrace_shift_128(tmp, 32); 1085 dtrace_add_128(product, tmp, product); 1086 } 1087 1088 /* 1089 * This privilege check should be used by actions and subroutines to 1090 * verify that the user credentials of the process that enabled the 1091 * invoking ECB match the target credentials 1092 */ 1093 static int 1094 dtrace_priv_proc_common_user(dtrace_state_t *state) 1095 { 1096 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1097 1098 /* 1099 * We should always have a non-NULL state cred here, since if cred 1100 * is null (anonymous tracing), we fast-path bypass this routine. 1101 */ 1102 ASSERT(s_cr != NULL); 1103 1104 if ((cr = CRED()) != NULL && 1105 s_cr->cr_uid == cr->cr_uid && 1106 s_cr->cr_uid == cr->cr_ruid && 1107 s_cr->cr_uid == cr->cr_suid && 1108 s_cr->cr_gid == cr->cr_gid && 1109 s_cr->cr_gid == cr->cr_rgid && 1110 s_cr->cr_gid == cr->cr_sgid) 1111 return (1); 1112 1113 return (0); 1114 } 1115 1116 /* 1117 * This privilege check should be used by actions and subroutines to 1118 * verify that the zone of the process that enabled the invoking ECB 1119 * matches the target credentials 1120 */ 1121 static int 1122 dtrace_priv_proc_common_zone(dtrace_state_t *state) 1123 { 1124 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1125 1126 /* 1127 * We should always have a non-NULL state cred here, since if cred 1128 * is null (anonymous tracing), we fast-path bypass this routine. 1129 */ 1130 ASSERT(s_cr != NULL); 1131 1132 if ((cr = CRED()) != NULL && 1133 s_cr->cr_zone == cr->cr_zone) 1134 return (1); 1135 1136 return (0); 1137 } 1138 1139 /* 1140 * This privilege check should be used by actions and subroutines to 1141 * verify that the process has not setuid or changed credentials. 1142 */ 1143 static int 1144 dtrace_priv_proc_common_nocd() 1145 { 1146 proc_t *proc; 1147 1148 if ((proc = ttoproc(curthread)) != NULL && 1149 !(proc->p_flag & SNOCD)) 1150 return (1); 1151 1152 return (0); 1153 } 1154 1155 static int 1156 dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate) 1157 { 1158 int action = state->dts_cred.dcr_action; 1159 1160 if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) 1161 goto bad; 1162 1163 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && 1164 dtrace_priv_proc_common_zone(state) == 0) 1165 goto bad; 1166 1167 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && 1168 dtrace_priv_proc_common_user(state) == 0) 1169 goto bad; 1170 1171 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && 1172 dtrace_priv_proc_common_nocd() == 0) 1173 goto bad; 1174 1175 return (1); 1176 1177 bad: 1178 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1179 1180 return (0); 1181 } 1182 1183 static int 1184 dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) 1185 { 1186 if (mstate->dtms_access & DTRACE_ACCESS_PROC) { 1187 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) 1188 return (1); 1189 1190 if (dtrace_priv_proc_common_zone(state) && 1191 dtrace_priv_proc_common_user(state) && 1192 dtrace_priv_proc_common_nocd()) 1193 return (1); 1194 } 1195 1196 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1197 1198 return (0); 1199 } 1200 1201 static int 1202 dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate) 1203 { 1204 if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && 1205 (state->dts_cred.dcr_action & DTRACE_CRA_PROC)) 1206 return (1); 1207 1208 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1209 1210 return (0); 1211 } 1212 1213 static int 1214 dtrace_priv_kernel(dtrace_state_t *state) 1215 { 1216 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) 1217 return (1); 1218 1219 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1220 1221 return (0); 1222 } 1223 1224 static int 1225 dtrace_priv_kernel_destructive(dtrace_state_t *state) 1226 { 1227 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) 1228 return (1); 1229 1230 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1231 1232 return (0); 1233 } 1234 1235 /* 1236 * Determine if the dte_cond of the specified ECB allows for processing of 1237 * the current probe to continue. Note that this routine may allow continued 1238 * processing, but with access(es) stripped from the mstate's dtms_access 1239 * field. 1240 */ 1241 static int 1242 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, 1243 dtrace_ecb_t *ecb) 1244 { 1245 dtrace_probe_t *probe = ecb->dte_probe; 1246 dtrace_provider_t *prov = probe->dtpr_provider; 1247 dtrace_pops_t *pops = &prov->dtpv_pops; 1248 int mode = DTRACE_MODE_NOPRIV_DROP; 1249 1250 ASSERT(ecb->dte_cond); 1251 1252 if (pops->dtps_mode != NULL) { 1253 mode = pops->dtps_mode(prov->dtpv_arg, 1254 probe->dtpr_id, probe->dtpr_arg); 1255 1256 ASSERT((mode & DTRACE_MODE_USER) || 1257 (mode & DTRACE_MODE_KERNEL)); 1258 ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || 1259 (mode & DTRACE_MODE_NOPRIV_DROP)); 1260 } 1261 1262 /* 1263 * If the dte_cond bits indicate that this consumer is only allowed to 1264 * see user-mode firings of this probe, call the provider's dtps_mode() 1265 * entry point to check that the probe was fired while in a user 1266 * context. If that's not the case, use the policy specified by the 1267 * provider to determine if we drop the probe or merely restrict 1268 * operation. 1269 */ 1270 if (ecb->dte_cond & DTRACE_COND_USERMODE) { 1271 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); 1272 1273 if (!(mode & DTRACE_MODE_USER)) { 1274 if (mode & DTRACE_MODE_NOPRIV_DROP) 1275 return (0); 1276 1277 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; 1278 } 1279 } 1280 1281 /* 1282 * This is more subtle than it looks. We have to be absolutely certain 1283 * that CRED() isn't going to change out from under us so it's only 1284 * legit to examine that structure if we're in constrained situations. 1285 * Currently, the only times we'll this check is if a non-super-user 1286 * has enabled the profile or syscall providers -- providers that 1287 * allow visibility of all processes. For the profile case, the check 1288 * above will ensure that we're examining a user context. 1289 */ 1290 if (ecb->dte_cond & DTRACE_COND_OWNER) { 1291 cred_t *cr; 1292 cred_t *s_cr = state->dts_cred.dcr_cred; 1293 proc_t *proc; 1294 1295 ASSERT(s_cr != NULL); 1296 1297 if ((cr = CRED()) == NULL || 1298 s_cr->cr_uid != cr->cr_uid || 1299 s_cr->cr_uid != cr->cr_ruid || 1300 s_cr->cr_uid != cr->cr_suid || 1301 s_cr->cr_gid != cr->cr_gid || 1302 s_cr->cr_gid != cr->cr_rgid || 1303 s_cr->cr_gid != cr->cr_sgid || 1304 (proc = ttoproc(curthread)) == NULL || 1305 (proc->p_flag & SNOCD)) { 1306 if (mode & DTRACE_MODE_NOPRIV_DROP) 1307 return (0); 1308 1309 mstate->dtms_access &= ~DTRACE_ACCESS_PROC; 1310 } 1311 } 1312 1313 /* 1314 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not 1315 * in our zone, check to see if our mode policy is to restrict rather 1316 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC 1317 * and DTRACE_ACCESS_ARGS 1318 */ 1319 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 1320 cred_t *cr; 1321 cred_t *s_cr = state->dts_cred.dcr_cred; 1322 1323 ASSERT(s_cr != NULL); 1324 1325 if ((cr = CRED()) == NULL || 1326 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { 1327 if (mode & DTRACE_MODE_NOPRIV_DROP) 1328 return (0); 1329 1330 mstate->dtms_access &= 1331 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); 1332 } 1333 } 1334 1335 return (1); 1336 } 1337 1338 /* 1339 * Note: not called from probe context. This function is called 1340 * asynchronously (and at a regular interval) from outside of probe context to 1341 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable 1342 * cleaning is explained in detail in <sys/dtrace_impl.h>. 1343 */ 1344 void 1345 dtrace_dynvar_clean(dtrace_dstate_t *dstate) 1346 { 1347 dtrace_dynvar_t *dirty; 1348 dtrace_dstate_percpu_t *dcpu; 1349 dtrace_dynvar_t **rinsep; 1350 int i, j, work = 0; 1351 1352 for (i = 0; i < NCPU; i++) { 1353 dcpu = &dstate->dtds_percpu[i]; 1354 rinsep = &dcpu->dtdsc_rinsing; 1355 1356 /* 1357 * If the dirty list is NULL, there is no dirty work to do. 1358 */ 1359 if (dcpu->dtdsc_dirty == NULL) 1360 continue; 1361 1362 if (dcpu->dtdsc_rinsing != NULL) { 1363 /* 1364 * If the rinsing list is non-NULL, then it is because 1365 * this CPU was selected to accept another CPU's 1366 * dirty list -- and since that time, dirty buffers 1367 * have accumulated. This is a highly unlikely 1368 * condition, but we choose to ignore the dirty 1369 * buffers -- they'll be picked up a future cleanse. 1370 */ 1371 continue; 1372 } 1373 1374 if (dcpu->dtdsc_clean != NULL) { 1375 /* 1376 * If the clean list is non-NULL, then we're in a 1377 * situation where a CPU has done deallocations (we 1378 * have a non-NULL dirty list) but no allocations (we 1379 * also have a non-NULL clean list). We can't simply 1380 * move the dirty list into the clean list on this 1381 * CPU, yet we also don't want to allow this condition 1382 * to persist, lest a short clean list prevent a 1383 * massive dirty list from being cleaned (which in 1384 * turn could lead to otherwise avoidable dynamic 1385 * drops). To deal with this, we look for some CPU 1386 * with a NULL clean list, NULL dirty list, and NULL 1387 * rinsing list -- and then we borrow this CPU to 1388 * rinse our dirty list. 1389 */ 1390 for (j = 0; j < NCPU; j++) { 1391 dtrace_dstate_percpu_t *rinser; 1392 1393 rinser = &dstate->dtds_percpu[j]; 1394 1395 if (rinser->dtdsc_rinsing != NULL) 1396 continue; 1397 1398 if (rinser->dtdsc_dirty != NULL) 1399 continue; 1400 1401 if (rinser->dtdsc_clean != NULL) 1402 continue; 1403 1404 rinsep = &rinser->dtdsc_rinsing; 1405 break; 1406 } 1407 1408 if (j == NCPU) { 1409 /* 1410 * We were unable to find another CPU that 1411 * could accept this dirty list -- we are 1412 * therefore unable to clean it now. 1413 */ 1414 dtrace_dynvar_failclean++; 1415 continue; 1416 } 1417 } 1418 1419 work = 1; 1420 1421 /* 1422 * Atomically move the dirty list aside. 1423 */ 1424 do { 1425 dirty = dcpu->dtdsc_dirty; 1426 1427 /* 1428 * Before we zap the dirty list, set the rinsing list. 1429 * (This allows for a potential assertion in 1430 * dtrace_dynvar(): if a free dynamic variable appears 1431 * on a hash chain, either the dirty list or the 1432 * rinsing list for some CPU must be non-NULL.) 1433 */ 1434 *rinsep = dirty; 1435 dtrace_membar_producer(); 1436 } while (dtrace_casptr(&dcpu->dtdsc_dirty, 1437 dirty, NULL) != dirty); 1438 } 1439 1440 if (!work) { 1441 /* 1442 * We have no work to do; we can simply return. 1443 */ 1444 return; 1445 } 1446 1447 dtrace_sync(); 1448 1449 for (i = 0; i < NCPU; i++) { 1450 dcpu = &dstate->dtds_percpu[i]; 1451 1452 if (dcpu->dtdsc_rinsing == NULL) 1453 continue; 1454 1455 /* 1456 * We are now guaranteed that no hash chain contains a pointer 1457 * into this dirty list; we can make it clean. 1458 */ 1459 ASSERT(dcpu->dtdsc_clean == NULL); 1460 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; 1461 dcpu->dtdsc_rinsing = NULL; 1462 } 1463 1464 /* 1465 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 1466 * sure that all CPUs have seen all of the dtdsc_clean pointers. 1467 * This prevents a race whereby a CPU incorrectly decides that 1468 * the state should be something other than DTRACE_DSTATE_CLEAN 1469 * after dtrace_dynvar_clean() has completed. 1470 */ 1471 dtrace_sync(); 1472 1473 dstate->dtds_state = DTRACE_DSTATE_CLEAN; 1474 } 1475 1476 /* 1477 * Depending on the value of the op parameter, this function looks-up, 1478 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an 1479 * allocation is requested, this function will return a pointer to a 1480 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 1481 * variable can be allocated. If NULL is returned, the appropriate counter 1482 * will be incremented. 1483 */ 1484 dtrace_dynvar_t * 1485 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, 1486 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, 1487 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 1488 { 1489 uint64_t hashval = DTRACE_DYNHASH_VALID; 1490 dtrace_dynhash_t *hash = dstate->dtds_hash; 1491 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; 1492 processorid_t me = CPU->cpu_id, cpu = me; 1493 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; 1494 size_t bucket, ksize; 1495 size_t chunksize = dstate->dtds_chunksize; 1496 uintptr_t kdata, lock, nstate; 1497 uint_t i; 1498 1499 ASSERT(nkeys != 0); 1500 1501 /* 1502 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" 1503 * algorithm. For the by-value portions, we perform the algorithm in 1504 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a 1505 * bit, and seems to have only a minute effect on distribution. For 1506 * the by-reference data, we perform "One-at-a-time" iterating (safely) 1507 * over each referenced byte. It's painful to do this, but it's much 1508 * better than pathological hash distribution. The efficacy of the 1509 * hashing algorithm (and a comparison with other algorithms) may be 1510 * found by running the ::dtrace_dynstat MDB dcmd. 1511 */ 1512 for (i = 0; i < nkeys; i++) { 1513 if (key[i].dttk_size == 0) { 1514 uint64_t val = key[i].dttk_value; 1515 1516 hashval += (val >> 48) & 0xffff; 1517 hashval += (hashval << 10); 1518 hashval ^= (hashval >> 6); 1519 1520 hashval += (val >> 32) & 0xffff; 1521 hashval += (hashval << 10); 1522 hashval ^= (hashval >> 6); 1523 1524 hashval += (val >> 16) & 0xffff; 1525 hashval += (hashval << 10); 1526 hashval ^= (hashval >> 6); 1527 1528 hashval += val & 0xffff; 1529 hashval += (hashval << 10); 1530 hashval ^= (hashval >> 6); 1531 } else { 1532 /* 1533 * This is incredibly painful, but it beats the hell 1534 * out of the alternative. 1535 */ 1536 uint64_t j, size = key[i].dttk_size; 1537 uintptr_t base = (uintptr_t)key[i].dttk_value; 1538 1539 if (!dtrace_canload(base, size, mstate, vstate)) 1540 break; 1541 1542 for (j = 0; j < size; j++) { 1543 hashval += dtrace_load8(base + j); 1544 hashval += (hashval << 10); 1545 hashval ^= (hashval >> 6); 1546 } 1547 } 1548 } 1549 1550 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 1551 return (NULL); 1552 1553 hashval += (hashval << 3); 1554 hashval ^= (hashval >> 11); 1555 hashval += (hashval << 15); 1556 1557 /* 1558 * There is a remote chance (ideally, 1 in 2^31) that our hashval 1559 * comes out to be one of our two sentinel hash values. If this 1560 * actually happens, we set the hashval to be a value known to be a 1561 * non-sentinel value. 1562 */ 1563 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) 1564 hashval = DTRACE_DYNHASH_VALID; 1565 1566 /* 1567 * Yes, it's painful to do a divide here. If the cycle count becomes 1568 * important here, tricks can be pulled to reduce it. (However, it's 1569 * critical that hash collisions be kept to an absolute minimum; 1570 * they're much more painful than a divide.) It's better to have a 1571 * solution that generates few collisions and still keeps things 1572 * relatively simple. 1573 */ 1574 bucket = hashval % dstate->dtds_hashsize; 1575 1576 if (op == DTRACE_DYNVAR_DEALLOC) { 1577 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; 1578 1579 for (;;) { 1580 while ((lock = *lockp) & 1) 1581 continue; 1582 1583 if (dtrace_casptr((void *)lockp, 1584 (void *)lock, (void *)(lock + 1)) == (void *)lock) 1585 break; 1586 } 1587 1588 dtrace_membar_producer(); 1589 } 1590 1591 top: 1592 prev = NULL; 1593 lock = hash[bucket].dtdh_lock; 1594 1595 dtrace_membar_consumer(); 1596 1597 start = hash[bucket].dtdh_chain; 1598 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || 1599 start->dtdv_hashval != DTRACE_DYNHASH_FREE || 1600 op != DTRACE_DYNVAR_DEALLOC)); 1601 1602 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { 1603 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; 1604 dtrace_key_t *dkey = &dtuple->dtt_key[0]; 1605 1606 if (dvar->dtdv_hashval != hashval) { 1607 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { 1608 /* 1609 * We've reached the sink, and therefore the 1610 * end of the hash chain; we can kick out of 1611 * the loop knowing that we have seen a valid 1612 * snapshot of state. 1613 */ 1614 ASSERT(dvar->dtdv_next == NULL); 1615 ASSERT(dvar == &dtrace_dynhash_sink); 1616 break; 1617 } 1618 1619 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { 1620 /* 1621 * We've gone off the rails: somewhere along 1622 * the line, one of the members of this hash 1623 * chain was deleted. Note that we could also 1624 * detect this by simply letting this loop run 1625 * to completion, as we would eventually hit 1626 * the end of the dirty list. However, we 1627 * want to avoid running the length of the 1628 * dirty list unnecessarily (it might be quite 1629 * long), so we catch this as early as 1630 * possible by detecting the hash marker. In 1631 * this case, we simply set dvar to NULL and 1632 * break; the conditional after the loop will 1633 * send us back to top. 1634 */ 1635 dvar = NULL; 1636 break; 1637 } 1638 1639 goto next; 1640 } 1641 1642 if (dtuple->dtt_nkeys != nkeys) 1643 goto next; 1644 1645 for (i = 0; i < nkeys; i++, dkey++) { 1646 if (dkey->dttk_size != key[i].dttk_size) 1647 goto next; /* size or type mismatch */ 1648 1649 if (dkey->dttk_size != 0) { 1650 if (dtrace_bcmp( 1651 (void *)(uintptr_t)key[i].dttk_value, 1652 (void *)(uintptr_t)dkey->dttk_value, 1653 dkey->dttk_size)) 1654 goto next; 1655 } else { 1656 if (dkey->dttk_value != key[i].dttk_value) 1657 goto next; 1658 } 1659 } 1660 1661 if (op != DTRACE_DYNVAR_DEALLOC) 1662 return (dvar); 1663 1664 ASSERT(dvar->dtdv_next == NULL || 1665 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); 1666 1667 if (prev != NULL) { 1668 ASSERT(hash[bucket].dtdh_chain != dvar); 1669 ASSERT(start != dvar); 1670 ASSERT(prev->dtdv_next == dvar); 1671 prev->dtdv_next = dvar->dtdv_next; 1672 } else { 1673 if (dtrace_casptr(&hash[bucket].dtdh_chain, 1674 start, dvar->dtdv_next) != start) { 1675 /* 1676 * We have failed to atomically swing the 1677 * hash table head pointer, presumably because 1678 * of a conflicting allocation on another CPU. 1679 * We need to reread the hash chain and try 1680 * again. 1681 */ 1682 goto top; 1683 } 1684 } 1685 1686 dtrace_membar_producer(); 1687 1688 /* 1689 * Now set the hash value to indicate that it's free. 1690 */ 1691 ASSERT(hash[bucket].dtdh_chain != dvar); 1692 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1693 1694 dtrace_membar_producer(); 1695 1696 /* 1697 * Set the next pointer to point at the dirty list, and 1698 * atomically swing the dirty pointer to the newly freed dvar. 1699 */ 1700 do { 1701 next = dcpu->dtdsc_dirty; 1702 dvar->dtdv_next = next; 1703 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); 1704 1705 /* 1706 * Finally, unlock this hash bucket. 1707 */ 1708 ASSERT(hash[bucket].dtdh_lock == lock); 1709 ASSERT(lock & 1); 1710 hash[bucket].dtdh_lock++; 1711 1712 return (NULL); 1713 next: 1714 prev = dvar; 1715 continue; 1716 } 1717 1718 if (dvar == NULL) { 1719 /* 1720 * If dvar is NULL, it is because we went off the rails: 1721 * one of the elements that we traversed in the hash chain 1722 * was deleted while we were traversing it. In this case, 1723 * we assert that we aren't doing a dealloc (deallocs lock 1724 * the hash bucket to prevent themselves from racing with 1725 * one another), and retry the hash chain traversal. 1726 */ 1727 ASSERT(op != DTRACE_DYNVAR_DEALLOC); 1728 goto top; 1729 } 1730 1731 if (op != DTRACE_DYNVAR_ALLOC) { 1732 /* 1733 * If we are not to allocate a new variable, we want to 1734 * return NULL now. Before we return, check that the value 1735 * of the lock word hasn't changed. If it has, we may have 1736 * seen an inconsistent snapshot. 1737 */ 1738 if (op == DTRACE_DYNVAR_NOALLOC) { 1739 if (hash[bucket].dtdh_lock != lock) 1740 goto top; 1741 } else { 1742 ASSERT(op == DTRACE_DYNVAR_DEALLOC); 1743 ASSERT(hash[bucket].dtdh_lock == lock); 1744 ASSERT(lock & 1); 1745 hash[bucket].dtdh_lock++; 1746 } 1747 1748 return (NULL); 1749 } 1750 1751 /* 1752 * We need to allocate a new dynamic variable. The size we need is the 1753 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 1754 * size of any auxiliary key data (rounded up to 8-byte alignment) plus 1755 * the size of any referred-to data (dsize). We then round the final 1756 * size up to the chunksize for allocation. 1757 */ 1758 for (ksize = 0, i = 0; i < nkeys; i++) 1759 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 1760 1761 /* 1762 * This should be pretty much impossible, but could happen if, say, 1763 * strange DIF specified the tuple. Ideally, this should be an 1764 * assertion and not an error condition -- but that requires that the 1765 * chunksize calculation in dtrace_difo_chunksize() be absolutely 1766 * bullet-proof. (That is, it must not be able to be fooled by 1767 * malicious DIF.) Given the lack of backwards branches in DIF, 1768 * solving this would presumably not amount to solving the Halting 1769 * Problem -- but it still seems awfully hard. 1770 */ 1771 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + 1772 ksize + dsize > chunksize) { 1773 dcpu->dtdsc_drops++; 1774 return (NULL); 1775 } 1776 1777 nstate = DTRACE_DSTATE_EMPTY; 1778 1779 do { 1780 retry: 1781 free = dcpu->dtdsc_free; 1782 1783 if (free == NULL) { 1784 dtrace_dynvar_t *clean = dcpu->dtdsc_clean; 1785 void *rval; 1786 1787 if (clean == NULL) { 1788 /* 1789 * We're out of dynamic variable space on 1790 * this CPU. Unless we have tried all CPUs, 1791 * we'll try to allocate from a different 1792 * CPU. 1793 */ 1794 switch (dstate->dtds_state) { 1795 case DTRACE_DSTATE_CLEAN: { 1796 void *sp = &dstate->dtds_state; 1797 1798 if (++cpu >= NCPU) 1799 cpu = 0; 1800 1801 if (dcpu->dtdsc_dirty != NULL && 1802 nstate == DTRACE_DSTATE_EMPTY) 1803 nstate = DTRACE_DSTATE_DIRTY; 1804 1805 if (dcpu->dtdsc_rinsing != NULL) 1806 nstate = DTRACE_DSTATE_RINSING; 1807 1808 dcpu = &dstate->dtds_percpu[cpu]; 1809 1810 if (cpu != me) 1811 goto retry; 1812 1813 (void) dtrace_cas32(sp, 1814 DTRACE_DSTATE_CLEAN, nstate); 1815 1816 /* 1817 * To increment the correct bean 1818 * counter, take another lap. 1819 */ 1820 goto retry; 1821 } 1822 1823 case DTRACE_DSTATE_DIRTY: 1824 dcpu->dtdsc_dirty_drops++; 1825 break; 1826 1827 case DTRACE_DSTATE_RINSING: 1828 dcpu->dtdsc_rinsing_drops++; 1829 break; 1830 1831 case DTRACE_DSTATE_EMPTY: 1832 dcpu->dtdsc_drops++; 1833 break; 1834 } 1835 1836 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); 1837 return (NULL); 1838 } 1839 1840 /* 1841 * The clean list appears to be non-empty. We want to 1842 * move the clean list to the free list; we start by 1843 * moving the clean pointer aside. 1844 */ 1845 if (dtrace_casptr(&dcpu->dtdsc_clean, 1846 clean, NULL) != clean) { 1847 /* 1848 * We are in one of two situations: 1849 * 1850 * (a) The clean list was switched to the 1851 * free list by another CPU. 1852 * 1853 * (b) The clean list was added to by the 1854 * cleansing cyclic. 1855 * 1856 * In either of these situations, we can 1857 * just reattempt the free list allocation. 1858 */ 1859 goto retry; 1860 } 1861 1862 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); 1863 1864 /* 1865 * Now we'll move the clean list to our free list. 1866 * It's impossible for this to fail: the only way 1867 * the free list can be updated is through this 1868 * code path, and only one CPU can own the clean list. 1869 * Thus, it would only be possible for this to fail if 1870 * this code were racing with dtrace_dynvar_clean(). 1871 * (That is, if dtrace_dynvar_clean() updated the clean 1872 * list, and we ended up racing to update the free 1873 * list.) This race is prevented by the dtrace_sync() 1874 * in dtrace_dynvar_clean() -- which flushes the 1875 * owners of the clean lists out before resetting 1876 * the clean lists. 1877 */ 1878 dcpu = &dstate->dtds_percpu[me]; 1879 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); 1880 ASSERT(rval == NULL); 1881 goto retry; 1882 } 1883 1884 dvar = free; 1885 new_free = dvar->dtdv_next; 1886 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); 1887 1888 /* 1889 * We have now allocated a new chunk. We copy the tuple keys into the 1890 * tuple array and copy any referenced key data into the data space 1891 * following the tuple array. As we do this, we relocate dttk_value 1892 * in the final tuple to point to the key data address in the chunk. 1893 */ 1894 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; 1895 dvar->dtdv_data = (void *)(kdata + ksize); 1896 dvar->dtdv_tuple.dtt_nkeys = nkeys; 1897 1898 for (i = 0; i < nkeys; i++) { 1899 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; 1900 size_t kesize = key[i].dttk_size; 1901 1902 if (kesize != 0) { 1903 dtrace_bcopy( 1904 (const void *)(uintptr_t)key[i].dttk_value, 1905 (void *)kdata, kesize); 1906 dkey->dttk_value = kdata; 1907 kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); 1908 } else { 1909 dkey->dttk_value = key[i].dttk_value; 1910 } 1911 1912 dkey->dttk_size = kesize; 1913 } 1914 1915 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); 1916 dvar->dtdv_hashval = hashval; 1917 dvar->dtdv_next = start; 1918 1919 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) 1920 return (dvar); 1921 1922 /* 1923 * The cas has failed. Either another CPU is adding an element to 1924 * this hash chain, or another CPU is deleting an element from this 1925 * hash chain. The simplest way to deal with both of these cases 1926 * (though not necessarily the most efficient) is to free our 1927 * allocated block and tail-call ourselves. Note that the free is 1928 * to the dirty list and _not_ to the free list. This is to prevent 1929 * races with allocators, above. 1930 */ 1931 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1932 1933 dtrace_membar_producer(); 1934 1935 do { 1936 free = dcpu->dtdsc_dirty; 1937 dvar->dtdv_next = free; 1938 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); 1939 1940 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate)); 1941 } 1942 1943 /*ARGSUSED*/ 1944 static void 1945 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) 1946 { 1947 if ((int64_t)nval < (int64_t)*oval) 1948 *oval = nval; 1949 } 1950 1951 /*ARGSUSED*/ 1952 static void 1953 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) 1954 { 1955 if ((int64_t)nval > (int64_t)*oval) 1956 *oval = nval; 1957 } 1958 1959 static void 1960 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) 1961 { 1962 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; 1963 int64_t val = (int64_t)nval; 1964 1965 if (val < 0) { 1966 for (i = 0; i < zero; i++) { 1967 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { 1968 quanta[i] += incr; 1969 return; 1970 } 1971 } 1972 } else { 1973 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { 1974 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { 1975 quanta[i - 1] += incr; 1976 return; 1977 } 1978 } 1979 1980 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; 1981 return; 1982 } 1983 1984 ASSERT(0); 1985 } 1986 1987 static void 1988 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) 1989 { 1990 uint64_t arg = *lquanta++; 1991 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 1992 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 1993 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 1994 int32_t val = (int32_t)nval, level; 1995 1996 ASSERT(step != 0); 1997 ASSERT(levels != 0); 1998 1999 if (val < base) { 2000 /* 2001 * This is an underflow. 2002 */ 2003 lquanta[0] += incr; 2004 return; 2005 } 2006 2007 level = (val - base) / step; 2008 2009 if (level < levels) { 2010 lquanta[level + 1] += incr; 2011 return; 2012 } 2013 2014 /* 2015 * This is an overflow. 2016 */ 2017 lquanta[levels + 1] += incr; 2018 } 2019 2020 static int 2021 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, 2022 uint16_t high, uint16_t nsteps, int64_t value) 2023 { 2024 int64_t this = 1, last, next; 2025 int base = 1, order; 2026 2027 ASSERT(factor <= nsteps); 2028 ASSERT(nsteps % factor == 0); 2029 2030 for (order = 0; order < low; order++) 2031 this *= factor; 2032 2033 /* 2034 * If our value is less than our factor taken to the power of the 2035 * low order of magnitude, it goes into the zeroth bucket. 2036 */ 2037 if (value < (last = this)) 2038 return (0); 2039 2040 for (this *= factor; order <= high; order++) { 2041 int nbuckets = this > nsteps ? nsteps : this; 2042 2043 if ((next = this * factor) < this) { 2044 /* 2045 * We should not generally get log/linear quantizations 2046 * with a high magnitude that allows 64-bits to 2047 * overflow, but we nonetheless protect against this 2048 * by explicitly checking for overflow, and clamping 2049 * our value accordingly. 2050 */ 2051 value = this - 1; 2052 } 2053 2054 if (value < this) { 2055 /* 2056 * If our value lies within this order of magnitude, 2057 * determine its position by taking the offset within 2058 * the order of magnitude, dividing by the bucket 2059 * width, and adding to our (accumulated) base. 2060 */ 2061 return (base + (value - last) / (this / nbuckets)); 2062 } 2063 2064 base += nbuckets - (nbuckets / factor); 2065 last = this; 2066 this = next; 2067 } 2068 2069 /* 2070 * Our value is greater than or equal to our factor taken to the 2071 * power of one plus the high magnitude -- return the top bucket. 2072 */ 2073 return (base); 2074 } 2075 2076 static void 2077 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) 2078 { 2079 uint64_t arg = *llquanta++; 2080 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); 2081 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); 2082 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); 2083 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); 2084 2085 llquanta[dtrace_aggregate_llquantize_bucket(factor, 2086 low, high, nsteps, nval)] += incr; 2087 } 2088 2089 /*ARGSUSED*/ 2090 static void 2091 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) 2092 { 2093 data[0]++; 2094 data[1] += nval; 2095 } 2096 2097 /*ARGSUSED*/ 2098 static void 2099 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) 2100 { 2101 int64_t snval = (int64_t)nval; 2102 uint64_t tmp[2]; 2103 2104 data[0]++; 2105 data[1] += nval; 2106 2107 /* 2108 * What we want to say here is: 2109 * 2110 * data[2] += nval * nval; 2111 * 2112 * But given that nval is 64-bit, we could easily overflow, so 2113 * we do this as 128-bit arithmetic. 2114 */ 2115 if (snval < 0) 2116 snval = -snval; 2117 2118 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); 2119 dtrace_add_128(data + 2, tmp, data + 2); 2120 } 2121 2122 /*ARGSUSED*/ 2123 static void 2124 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) 2125 { 2126 *oval = *oval + 1; 2127 } 2128 2129 /*ARGSUSED*/ 2130 static void 2131 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) 2132 { 2133 *oval += nval; 2134 } 2135 2136 /* 2137 * Aggregate given the tuple in the principal data buffer, and the aggregating 2138 * action denoted by the specified dtrace_aggregation_t. The aggregation 2139 * buffer is specified as the buf parameter. This routine does not return 2140 * failure; if there is no space in the aggregation buffer, the data will be 2141 * dropped, and a corresponding counter incremented. 2142 */ 2143 static void 2144 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, 2145 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) 2146 { 2147 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; 2148 uint32_t i, ndx, size, fsize; 2149 uint32_t align = sizeof (uint64_t) - 1; 2150 dtrace_aggbuffer_t *agb; 2151 dtrace_aggkey_t *key; 2152 uint32_t hashval = 0, limit, isstr; 2153 caddr_t tomax, data, kdata; 2154 dtrace_actkind_t action; 2155 dtrace_action_t *act; 2156 uintptr_t offs; 2157 2158 if (buf == NULL) 2159 return; 2160 2161 if (!agg->dtag_hasarg) { 2162 /* 2163 * Currently, only quantize() and lquantize() take additional 2164 * arguments, and they have the same semantics: an increment 2165 * value that defaults to 1 when not present. If additional 2166 * aggregating actions take arguments, the setting of the 2167 * default argument value will presumably have to become more 2168 * sophisticated... 2169 */ 2170 arg = 1; 2171 } 2172 2173 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; 2174 size = rec->dtrd_offset - agg->dtag_base; 2175 fsize = size + rec->dtrd_size; 2176 2177 ASSERT(dbuf->dtb_tomax != NULL); 2178 data = dbuf->dtb_tomax + offset + agg->dtag_base; 2179 2180 if ((tomax = buf->dtb_tomax) == NULL) { 2181 dtrace_buffer_drop(buf); 2182 return; 2183 } 2184 2185 /* 2186 * The metastructure is always at the bottom of the buffer. 2187 */ 2188 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size - 2189 sizeof (dtrace_aggbuffer_t)); 2190 2191 if (buf->dtb_offset == 0) { 2192 /* 2193 * We just kludge up approximately 1/8th of the size to be 2194 * buckets. If this guess ends up being routinely 2195 * off-the-mark, we may need to dynamically readjust this 2196 * based on past performance. 2197 */ 2198 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t); 2199 2200 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) < 2201 (uintptr_t)tomax || hashsize == 0) { 2202 /* 2203 * We've been given a ludicrously small buffer; 2204 * increment our drop count and leave. 2205 */ 2206 dtrace_buffer_drop(buf); 2207 return; 2208 } 2209 2210 /* 2211 * And now, a pathetic attempt to try to get a an odd (or 2212 * perchance, a prime) hash size for better hash distribution. 2213 */ 2214 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) 2215 hashsize -= DTRACE_AGGHASHSIZE_SLEW; 2216 2217 agb->dtagb_hashsize = hashsize; 2218 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb - 2219 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *)); 2220 agb->dtagb_free = (uintptr_t)agb->dtagb_hash; 2221 2222 for (i = 0; i < agb->dtagb_hashsize; i++) 2223 agb->dtagb_hash[i] = NULL; 2224 } 2225 2226 ASSERT(agg->dtag_first != NULL); 2227 ASSERT(agg->dtag_first->dta_intuple); 2228 2229 /* 2230 * Calculate the hash value based on the key. Note that we _don't_ 2231 * include the aggid in the hashing (but we will store it as part of 2232 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" 2233 * algorithm: a simple, quick algorithm that has no known funnels, and 2234 * gets good distribution in practice. The efficacy of the hashing 2235 * algorithm (and a comparison with other algorithms) may be found by 2236 * running the ::dtrace_aggstat MDB dcmd. 2237 */ 2238 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2239 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2240 limit = i + act->dta_rec.dtrd_size; 2241 ASSERT(limit <= size); 2242 isstr = DTRACEACT_ISSTRING(act); 2243 2244 for (; i < limit; i++) { 2245 hashval += data[i]; 2246 hashval += (hashval << 10); 2247 hashval ^= (hashval >> 6); 2248 2249 if (isstr && data[i] == '\0') 2250 break; 2251 } 2252 } 2253 2254 hashval += (hashval << 3); 2255 hashval ^= (hashval >> 11); 2256 hashval += (hashval << 15); 2257 2258 /* 2259 * Yes, the divide here is expensive -- but it's generally the least 2260 * of the performance issues given the amount of data that we iterate 2261 * over to compute hash values, compare data, etc. 2262 */ 2263 ndx = hashval % agb->dtagb_hashsize; 2264 2265 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { 2266 ASSERT((caddr_t)key >= tomax); 2267 ASSERT((caddr_t)key < tomax + buf->dtb_size); 2268 2269 if (hashval != key->dtak_hashval || key->dtak_size != size) 2270 continue; 2271 2272 kdata = key->dtak_data; 2273 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); 2274 2275 for (act = agg->dtag_first; act->dta_intuple; 2276 act = act->dta_next) { 2277 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2278 limit = i + act->dta_rec.dtrd_size; 2279 ASSERT(limit <= size); 2280 isstr = DTRACEACT_ISSTRING(act); 2281 2282 for (; i < limit; i++) { 2283 if (kdata[i] != data[i]) 2284 goto next; 2285 2286 if (isstr && data[i] == '\0') 2287 break; 2288 } 2289 } 2290 2291 if (action != key->dtak_action) { 2292 /* 2293 * We are aggregating on the same value in the same 2294 * aggregation with two different aggregating actions. 2295 * (This should have been picked up in the compiler, 2296 * so we may be dealing with errant or devious DIF.) 2297 * This is an error condition; we indicate as much, 2298 * and return. 2299 */ 2300 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 2301 return; 2302 } 2303 2304 /* 2305 * This is a hit: we need to apply the aggregator to 2306 * the value at this key. 2307 */ 2308 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); 2309 return; 2310 next: 2311 continue; 2312 } 2313 2314 /* 2315 * We didn't find it. We need to allocate some zero-filled space, 2316 * link it into the hash table appropriately, and apply the aggregator 2317 * to the (zero-filled) value. 2318 */ 2319 offs = buf->dtb_offset; 2320 while (offs & (align - 1)) 2321 offs += sizeof (uint32_t); 2322 2323 /* 2324 * If we don't have enough room to both allocate a new key _and_ 2325 * its associated data, increment the drop count and return. 2326 */ 2327 if ((uintptr_t)tomax + offs + fsize > 2328 agb->dtagb_free - sizeof (dtrace_aggkey_t)) { 2329 dtrace_buffer_drop(buf); 2330 return; 2331 } 2332 2333 /*CONSTCOND*/ 2334 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1))); 2335 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t)); 2336 agb->dtagb_free -= sizeof (dtrace_aggkey_t); 2337 2338 key->dtak_data = kdata = tomax + offs; 2339 buf->dtb_offset = offs + fsize; 2340 2341 /* 2342 * Now copy the data across. 2343 */ 2344 *((dtrace_aggid_t *)kdata) = agg->dtag_id; 2345 2346 for (i = sizeof (dtrace_aggid_t); i < size; i++) 2347 kdata[i] = data[i]; 2348 2349 /* 2350 * Because strings are not zeroed out by default, we need to iterate 2351 * looking for actions that store strings, and we need to explicitly 2352 * pad these strings out with zeroes. 2353 */ 2354 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2355 int nul; 2356 2357 if (!DTRACEACT_ISSTRING(act)) 2358 continue; 2359 2360 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2361 limit = i + act->dta_rec.dtrd_size; 2362 ASSERT(limit <= size); 2363 2364 for (nul = 0; i < limit; i++) { 2365 if (nul) { 2366 kdata[i] = '\0'; 2367 continue; 2368 } 2369 2370 if (data[i] != '\0') 2371 continue; 2372 2373 nul = 1; 2374 } 2375 } 2376 2377 for (i = size; i < fsize; i++) 2378 kdata[i] = 0; 2379 2380 key->dtak_hashval = hashval; 2381 key->dtak_size = size; 2382 key->dtak_action = action; 2383 key->dtak_next = agb->dtagb_hash[ndx]; 2384 agb->dtagb_hash[ndx] = key; 2385 2386 /* 2387 * Finally, apply the aggregator. 2388 */ 2389 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; 2390 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); 2391 } 2392 2393 /* 2394 * Given consumer state, this routine finds a speculation in the INACTIVE 2395 * state and transitions it into the ACTIVE state. If there is no speculation 2396 * in the INACTIVE state, 0 is returned. In this case, no error counter is 2397 * incremented -- it is up to the caller to take appropriate action. 2398 */ 2399 static int 2400 dtrace_speculation(dtrace_state_t *state) 2401 { 2402 int i = 0; 2403 dtrace_speculation_state_t current; 2404 uint32_t *stat = &state->dts_speculations_unavail, count; 2405 2406 while (i < state->dts_nspeculations) { 2407 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2408 2409 current = spec->dtsp_state; 2410 2411 if (current != DTRACESPEC_INACTIVE) { 2412 if (current == DTRACESPEC_COMMITTINGMANY || 2413 current == DTRACESPEC_COMMITTING || 2414 current == DTRACESPEC_DISCARDING) 2415 stat = &state->dts_speculations_busy; 2416 i++; 2417 continue; 2418 } 2419 2420 if (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2421 current, DTRACESPEC_ACTIVE) == current) 2422 return (i + 1); 2423 } 2424 2425 /* 2426 * We couldn't find a speculation. If we found as much as a single 2427 * busy speculation buffer, we'll attribute this failure as "busy" 2428 * instead of "unavail". 2429 */ 2430 do { 2431 count = *stat; 2432 } while (dtrace_cas32(stat, count, count + 1) != count); 2433 2434 return (0); 2435 } 2436 2437 /* 2438 * This routine commits an active speculation. If the specified speculation 2439 * is not in a valid state to perform a commit(), this routine will silently do 2440 * nothing. The state of the specified speculation is transitioned according 2441 * to the state transition diagram outlined in <sys/dtrace_impl.h> 2442 */ 2443 static void 2444 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, 2445 dtrace_specid_t which) 2446 { 2447 dtrace_speculation_t *spec; 2448 dtrace_buffer_t *src, *dest; 2449 uintptr_t daddr, saddr, dlimit; 2450 dtrace_speculation_state_t current, new; 2451 intptr_t offs; 2452 2453 if (which == 0) 2454 return; 2455 2456 if (which > state->dts_nspeculations) { 2457 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2458 return; 2459 } 2460 2461 spec = &state->dts_speculations[which - 1]; 2462 src = &spec->dtsp_buffer[cpu]; 2463 dest = &state->dts_buffer[cpu]; 2464 2465 do { 2466 current = spec->dtsp_state; 2467 2468 if (current == DTRACESPEC_COMMITTINGMANY) 2469 break; 2470 2471 switch (current) { 2472 case DTRACESPEC_INACTIVE: 2473 case DTRACESPEC_DISCARDING: 2474 return; 2475 2476 case DTRACESPEC_COMMITTING: 2477 /* 2478 * This is only possible if we are (a) commit()'ing 2479 * without having done a prior speculate() on this CPU 2480 * and (b) racing with another commit() on a different 2481 * CPU. There's nothing to do -- we just assert that 2482 * our offset is 0. 2483 */ 2484 ASSERT(src->dtb_offset == 0); 2485 return; 2486 2487 case DTRACESPEC_ACTIVE: 2488 new = DTRACESPEC_COMMITTING; 2489 break; 2490 2491 case DTRACESPEC_ACTIVEONE: 2492 /* 2493 * This speculation is active on one CPU. If our 2494 * buffer offset is non-zero, we know that the one CPU 2495 * must be us. Otherwise, we are committing on a 2496 * different CPU from the speculate(), and we must 2497 * rely on being asynchronously cleaned. 2498 */ 2499 if (src->dtb_offset != 0) { 2500 new = DTRACESPEC_COMMITTING; 2501 break; 2502 } 2503 /*FALLTHROUGH*/ 2504 2505 case DTRACESPEC_ACTIVEMANY: 2506 new = DTRACESPEC_COMMITTINGMANY; 2507 break; 2508 2509 default: 2510 ASSERT(0); 2511 } 2512 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2513 current, new) != current); 2514 2515 /* 2516 * We have set the state to indicate that we are committing this 2517 * speculation. Now reserve the necessary space in the destination 2518 * buffer. 2519 */ 2520 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset, 2521 sizeof (uint64_t), state, NULL)) < 0) { 2522 dtrace_buffer_drop(dest); 2523 goto out; 2524 } 2525 2526 /* 2527 * We have the space; copy the buffer across. (Note that this is a 2528 * highly subobtimal bcopy(); in the unlikely event that this becomes 2529 * a serious performance issue, a high-performance DTrace-specific 2530 * bcopy() should obviously be invented.) 2531 */ 2532 daddr = (uintptr_t)dest->dtb_tomax + offs; 2533 dlimit = daddr + src->dtb_offset; 2534 saddr = (uintptr_t)src->dtb_tomax; 2535 2536 /* 2537 * First, the aligned portion. 2538 */ 2539 while (dlimit - daddr >= sizeof (uint64_t)) { 2540 *((uint64_t *)daddr) = *((uint64_t *)saddr); 2541 2542 daddr += sizeof (uint64_t); 2543 saddr += sizeof (uint64_t); 2544 } 2545 2546 /* 2547 * Now any left-over bit... 2548 */ 2549 while (dlimit - daddr) 2550 *((uint8_t *)daddr++) = *((uint8_t *)saddr++); 2551 2552 /* 2553 * Finally, commit the reserved space in the destination buffer. 2554 */ 2555 dest->dtb_offset = offs + src->dtb_offset; 2556 2557 out: 2558 /* 2559 * If we're lucky enough to be the only active CPU on this speculation 2560 * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 2561 */ 2562 if (current == DTRACESPEC_ACTIVE || 2563 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { 2564 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, 2565 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); 2566 2567 ASSERT(rval == DTRACESPEC_COMMITTING); 2568 } 2569 2570 src->dtb_offset = 0; 2571 src->dtb_xamot_drops += src->dtb_drops; 2572 src->dtb_drops = 0; 2573 } 2574 2575 /* 2576 * This routine discards an active speculation. If the specified speculation 2577 * is not in a valid state to perform a discard(), this routine will silently 2578 * do nothing. The state of the specified speculation is transitioned 2579 * according to the state transition diagram outlined in <sys/dtrace_impl.h> 2580 */ 2581 static void 2582 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, 2583 dtrace_specid_t which) 2584 { 2585 dtrace_speculation_t *spec; 2586 dtrace_speculation_state_t current, new; 2587 dtrace_buffer_t *buf; 2588 2589 if (which == 0) 2590 return; 2591 2592 if (which > state->dts_nspeculations) { 2593 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2594 return; 2595 } 2596 2597 spec = &state->dts_speculations[which - 1]; 2598 buf = &spec->dtsp_buffer[cpu]; 2599 2600 do { 2601 current = spec->dtsp_state; 2602 2603 switch (current) { 2604 case DTRACESPEC_INACTIVE: 2605 case DTRACESPEC_COMMITTINGMANY: 2606 case DTRACESPEC_COMMITTING: 2607 case DTRACESPEC_DISCARDING: 2608 return; 2609 2610 case DTRACESPEC_ACTIVE: 2611 case DTRACESPEC_ACTIVEMANY: 2612 new = DTRACESPEC_DISCARDING; 2613 break; 2614 2615 case DTRACESPEC_ACTIVEONE: 2616 if (buf->dtb_offset != 0) { 2617 new = DTRACESPEC_INACTIVE; 2618 } else { 2619 new = DTRACESPEC_DISCARDING; 2620 } 2621 break; 2622 2623 default: 2624 ASSERT(0); 2625 } 2626 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2627 current, new) != current); 2628 2629 buf->dtb_offset = 0; 2630 buf->dtb_drops = 0; 2631 } 2632 2633 /* 2634 * Note: not called from probe context. This function is called 2635 * asynchronously from cross call context to clean any speculations that are 2636 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be 2637 * transitioned back to the INACTIVE state until all CPUs have cleaned the 2638 * speculation. 2639 */ 2640 static void 2641 dtrace_speculation_clean_here(dtrace_state_t *state) 2642 { 2643 dtrace_icookie_t cookie; 2644 processorid_t cpu = CPU->cpu_id; 2645 dtrace_buffer_t *dest = &state->dts_buffer[cpu]; 2646 dtrace_specid_t i; 2647 2648 cookie = dtrace_interrupt_disable(); 2649 2650 if (dest->dtb_tomax == NULL) { 2651 dtrace_interrupt_enable(cookie); 2652 return; 2653 } 2654 2655 for (i = 0; i < state->dts_nspeculations; i++) { 2656 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2657 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; 2658 2659 if (src->dtb_tomax == NULL) 2660 continue; 2661 2662 if (spec->dtsp_state == DTRACESPEC_DISCARDING) { 2663 src->dtb_offset = 0; 2664 continue; 2665 } 2666 2667 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2668 continue; 2669 2670 if (src->dtb_offset == 0) 2671 continue; 2672 2673 dtrace_speculation_commit(state, cpu, i + 1); 2674 } 2675 2676 dtrace_interrupt_enable(cookie); 2677 } 2678 2679 /* 2680 * Note: not called from probe context. This function is called 2681 * asynchronously (and at a regular interval) to clean any speculations that 2682 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there 2683 * is work to be done, it cross calls all CPUs to perform that work; 2684 * COMMITMANY and DISCARDING speculations may not be transitioned back to the 2685 * INACTIVE state until they have been cleaned by all CPUs. 2686 */ 2687 static void 2688 dtrace_speculation_clean(dtrace_state_t *state) 2689 { 2690 int work = 0, rv; 2691 dtrace_specid_t i; 2692 2693 for (i = 0; i < state->dts_nspeculations; i++) { 2694 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2695 2696 ASSERT(!spec->dtsp_cleaning); 2697 2698 if (spec->dtsp_state != DTRACESPEC_DISCARDING && 2699 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2700 continue; 2701 2702 work++; 2703 spec->dtsp_cleaning = 1; 2704 } 2705 2706 if (!work) 2707 return; 2708 2709 dtrace_xcall(DTRACE_CPUALL, 2710 (dtrace_xcall_t)dtrace_speculation_clean_here, state); 2711 2712 /* 2713 * We now know that all CPUs have committed or discarded their 2714 * speculation buffers, as appropriate. We can now set the state 2715 * to inactive. 2716 */ 2717 for (i = 0; i < state->dts_nspeculations; i++) { 2718 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2719 dtrace_speculation_state_t current, new; 2720 2721 if (!spec->dtsp_cleaning) 2722 continue; 2723 2724 current = spec->dtsp_state; 2725 ASSERT(current == DTRACESPEC_DISCARDING || 2726 current == DTRACESPEC_COMMITTINGMANY); 2727 2728 new = DTRACESPEC_INACTIVE; 2729 2730 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new); 2731 ASSERT(rv == current); 2732 spec->dtsp_cleaning = 0; 2733 } 2734 } 2735 2736 /* 2737 * Called as part of a speculate() to get the speculative buffer associated 2738 * with a given speculation. Returns NULL if the specified speculation is not 2739 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and 2740 * the active CPU is not the specified CPU -- the speculation will be 2741 * atomically transitioned into the ACTIVEMANY state. 2742 */ 2743 static dtrace_buffer_t * 2744 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, 2745 dtrace_specid_t which) 2746 { 2747 dtrace_speculation_t *spec; 2748 dtrace_speculation_state_t current, new; 2749 dtrace_buffer_t *buf; 2750 2751 if (which == 0) 2752 return (NULL); 2753 2754 if (which > state->dts_nspeculations) { 2755 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2756 return (NULL); 2757 } 2758 2759 spec = &state->dts_speculations[which - 1]; 2760 buf = &spec->dtsp_buffer[cpuid]; 2761 2762 do { 2763 current = spec->dtsp_state; 2764 2765 switch (current) { 2766 case DTRACESPEC_INACTIVE: 2767 case DTRACESPEC_COMMITTINGMANY: 2768 case DTRACESPEC_DISCARDING: 2769 return (NULL); 2770 2771 case DTRACESPEC_COMMITTING: 2772 ASSERT(buf->dtb_offset == 0); 2773 return (NULL); 2774 2775 case DTRACESPEC_ACTIVEONE: 2776 /* 2777 * This speculation is currently active on one CPU. 2778 * Check the offset in the buffer; if it's non-zero, 2779 * that CPU must be us (and we leave the state alone). 2780 * If it's zero, assume that we're starting on a new 2781 * CPU -- and change the state to indicate that the 2782 * speculation is active on more than one CPU. 2783 */ 2784 if (buf->dtb_offset != 0) 2785 return (buf); 2786 2787 new = DTRACESPEC_ACTIVEMANY; 2788 break; 2789 2790 case DTRACESPEC_ACTIVEMANY: 2791 return (buf); 2792 2793 case DTRACESPEC_ACTIVE: 2794 new = DTRACESPEC_ACTIVEONE; 2795 break; 2796 2797 default: 2798 ASSERT(0); 2799 } 2800 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2801 current, new) != current); 2802 2803 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); 2804 return (buf); 2805 } 2806 2807 /* 2808 * Return a string. In the event that the user lacks the privilege to access 2809 * arbitrary kernel memory, we copy the string out to scratch memory so that we 2810 * don't fail access checking. 2811 * 2812 * dtrace_dif_variable() uses this routine as a helper for various 2813 * builtin values such as 'execname' and 'probefunc.' 2814 */ 2815 uintptr_t 2816 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, 2817 dtrace_mstate_t *mstate) 2818 { 2819 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 2820 uintptr_t ret; 2821 size_t strsz; 2822 2823 /* 2824 * The easy case: this probe is allowed to read all of memory, so 2825 * we can just return this as a vanilla pointer. 2826 */ 2827 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 2828 return (addr); 2829 2830 /* 2831 * This is the tougher case: we copy the string in question from 2832 * kernel memory into scratch memory and return it that way: this 2833 * ensures that we won't trip up when access checking tests the 2834 * BYREF return value. 2835 */ 2836 strsz = dtrace_strlen((char *)addr, size) + 1; 2837 2838 if (mstate->dtms_scratch_ptr + strsz > 2839 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 2840 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 2841 return (NULL); 2842 } 2843 2844 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, 2845 strsz); 2846 ret = mstate->dtms_scratch_ptr; 2847 mstate->dtms_scratch_ptr += strsz; 2848 return (ret); 2849 } 2850 2851 /* 2852 * This function implements the DIF emulator's variable lookups. The emulator 2853 * passes a reserved variable identifier and optional built-in array index. 2854 */ 2855 static uint64_t 2856 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, 2857 uint64_t ndx) 2858 { 2859 /* 2860 * If we're accessing one of the uncached arguments, we'll turn this 2861 * into a reference in the args array. 2862 */ 2863 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { 2864 ndx = v - DIF_VAR_ARG0; 2865 v = DIF_VAR_ARGS; 2866 } 2867 2868 switch (v) { 2869 case DIF_VAR_ARGS: 2870 if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) { 2871 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= 2872 CPU_DTRACE_KPRIV; 2873 return (0); 2874 } 2875 2876 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); 2877 if (ndx >= sizeof (mstate->dtms_arg) / 2878 sizeof (mstate->dtms_arg[0])) { 2879 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2880 dtrace_provider_t *pv; 2881 uint64_t val; 2882 2883 pv = mstate->dtms_probe->dtpr_provider; 2884 if (pv->dtpv_pops.dtps_getargval != NULL) 2885 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, 2886 mstate->dtms_probe->dtpr_id, 2887 mstate->dtms_probe->dtpr_arg, ndx, aframes); 2888 else 2889 val = dtrace_getarg(ndx, aframes); 2890 2891 /* 2892 * This is regrettably required to keep the compiler 2893 * from tail-optimizing the call to dtrace_getarg(). 2894 * The condition always evaluates to true, but the 2895 * compiler has no way of figuring that out a priori. 2896 * (None of this would be necessary if the compiler 2897 * could be relied upon to _always_ tail-optimize 2898 * the call to dtrace_getarg() -- but it can't.) 2899 */ 2900 if (mstate->dtms_probe != NULL) 2901 return (val); 2902 2903 ASSERT(0); 2904 } 2905 2906 return (mstate->dtms_arg[ndx]); 2907 2908 case DIF_VAR_UREGS: { 2909 klwp_t *lwp; 2910 2911 if (!dtrace_priv_proc(state, mstate)) 2912 return (0); 2913 2914 if ((lwp = curthread->t_lwp) == NULL) { 2915 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 2916 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL; 2917 return (0); 2918 } 2919 2920 return (dtrace_getreg(lwp->lwp_regs, ndx)); 2921 } 2922 2923 case DIF_VAR_VMREGS: { 2924 uint64_t rval; 2925 2926 if (!dtrace_priv_kernel(state)) 2927 return (0); 2928 2929 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 2930 2931 rval = dtrace_getvmreg(ndx, 2932 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); 2933 2934 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 2935 2936 return (rval); 2937 } 2938 2939 case DIF_VAR_CURTHREAD: 2940 if (!dtrace_priv_proc(state, mstate)) 2941 return (0); 2942 return ((uint64_t)(uintptr_t)curthread); 2943 2944 case DIF_VAR_TIMESTAMP: 2945 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 2946 mstate->dtms_timestamp = dtrace_gethrtime(); 2947 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; 2948 } 2949 return (mstate->dtms_timestamp); 2950 2951 case DIF_VAR_VTIMESTAMP: 2952 ASSERT(dtrace_vtime_references != 0); 2953 return (curthread->t_dtrace_vtime); 2954 2955 case DIF_VAR_WALLTIMESTAMP: 2956 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { 2957 mstate->dtms_walltimestamp = dtrace_gethrestime(); 2958 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP; 2959 } 2960 return (mstate->dtms_walltimestamp); 2961 2962 case DIF_VAR_IPL: 2963 if (!dtrace_priv_kernel(state)) 2964 return (0); 2965 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { 2966 mstate->dtms_ipl = dtrace_getipl(); 2967 mstate->dtms_present |= DTRACE_MSTATE_IPL; 2968 } 2969 return (mstate->dtms_ipl); 2970 2971 case DIF_VAR_EPID: 2972 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); 2973 return (mstate->dtms_epid); 2974 2975 case DIF_VAR_ID: 2976 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2977 return (mstate->dtms_probe->dtpr_id); 2978 2979 case DIF_VAR_STACKDEPTH: 2980 if (!dtrace_priv_kernel(state)) 2981 return (0); 2982 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { 2983 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2984 2985 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); 2986 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; 2987 } 2988 return (mstate->dtms_stackdepth); 2989 2990 case DIF_VAR_USTACKDEPTH: 2991 if (!dtrace_priv_proc(state, mstate)) 2992 return (0); 2993 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { 2994 /* 2995 * See comment in DIF_VAR_PID. 2996 */ 2997 if (DTRACE_ANCHORED(mstate->dtms_probe) && 2998 CPU_ON_INTR(CPU)) { 2999 mstate->dtms_ustackdepth = 0; 3000 } else { 3001 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3002 mstate->dtms_ustackdepth = 3003 dtrace_getustackdepth(); 3004 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3005 } 3006 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; 3007 } 3008 return (mstate->dtms_ustackdepth); 3009 3010 case DIF_VAR_CALLER: 3011 if (!dtrace_priv_kernel(state)) 3012 return (0); 3013 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { 3014 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3015 3016 if (!DTRACE_ANCHORED(mstate->dtms_probe)) { 3017 /* 3018 * If this is an unanchored probe, we are 3019 * required to go through the slow path: 3020 * dtrace_caller() only guarantees correct 3021 * results for anchored probes. 3022 */ 3023 pc_t caller[2]; 3024 3025 dtrace_getpcstack(caller, 2, aframes, 3026 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); 3027 mstate->dtms_caller = caller[1]; 3028 } else if ((mstate->dtms_caller = 3029 dtrace_caller(aframes)) == -1) { 3030 /* 3031 * We have failed to do this the quick way; 3032 * we must resort to the slower approach of 3033 * calling dtrace_getpcstack(). 3034 */ 3035 pc_t caller; 3036 3037 dtrace_getpcstack(&caller, 1, aframes, NULL); 3038 mstate->dtms_caller = caller; 3039 } 3040 3041 mstate->dtms_present |= DTRACE_MSTATE_CALLER; 3042 } 3043 return (mstate->dtms_caller); 3044 3045 case DIF_VAR_UCALLER: 3046 if (!dtrace_priv_proc(state, mstate)) 3047 return (0); 3048 3049 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { 3050 uint64_t ustack[3]; 3051 3052 /* 3053 * dtrace_getupcstack() fills in the first uint64_t 3054 * with the current PID. The second uint64_t will 3055 * be the program counter at user-level. The third 3056 * uint64_t will contain the caller, which is what 3057 * we're after. 3058 */ 3059 ustack[2] = NULL; 3060 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3061 dtrace_getupcstack(ustack, 3); 3062 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3063 mstate->dtms_ucaller = ustack[2]; 3064 mstate->dtms_present |= DTRACE_MSTATE_UCALLER; 3065 } 3066 3067 return (mstate->dtms_ucaller); 3068 3069 case DIF_VAR_PROBEPROV: 3070 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3071 return (dtrace_dif_varstr( 3072 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, 3073 state, mstate)); 3074 3075 case DIF_VAR_PROBEMOD: 3076 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3077 return (dtrace_dif_varstr( 3078 (uintptr_t)mstate->dtms_probe->dtpr_mod, 3079 state, mstate)); 3080 3081 case DIF_VAR_PROBEFUNC: 3082 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3083 return (dtrace_dif_varstr( 3084 (uintptr_t)mstate->dtms_probe->dtpr_func, 3085 state, mstate)); 3086 3087 case DIF_VAR_PROBENAME: 3088 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3089 return (dtrace_dif_varstr( 3090 (uintptr_t)mstate->dtms_probe->dtpr_name, 3091 state, mstate)); 3092 3093 case DIF_VAR_PID: 3094 if (!dtrace_priv_proc(state, mstate)) 3095 return (0); 3096 3097 /* 3098 * Note that we are assuming that an unanchored probe is 3099 * always due to a high-level interrupt. (And we're assuming 3100 * that there is only a single high level interrupt.) 3101 */ 3102 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3103 return (pid0.pid_id); 3104 3105 /* 3106 * It is always safe to dereference one's own t_procp pointer: 3107 * it always points to a valid, allocated proc structure. 3108 * Further, it is always safe to dereference the p_pidp member 3109 * of one's own proc structure. (These are truisms becuase 3110 * threads and processes don't clean up their own state -- 3111 * they leave that task to whomever reaps them.) 3112 */ 3113 return ((uint64_t)curthread->t_procp->p_pidp->pid_id); 3114 3115 case DIF_VAR_PPID: 3116 if (!dtrace_priv_proc(state, mstate)) 3117 return (0); 3118 3119 /* 3120 * See comment in DIF_VAR_PID. 3121 */ 3122 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3123 return (pid0.pid_id); 3124 3125 /* 3126 * It is always safe to dereference one's own t_procp pointer: 3127 * it always points to a valid, allocated proc structure. 3128 * (This is true because threads don't clean up their own 3129 * state -- they leave that task to whomever reaps them.) 3130 */ 3131 return ((uint64_t)curthread->t_procp->p_ppid); 3132 3133 case DIF_VAR_TID: 3134 /* 3135 * See comment in DIF_VAR_PID. 3136 */ 3137 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3138 return (0); 3139 3140 return ((uint64_t)curthread->t_tid); 3141 3142 case DIF_VAR_EXECNAME: 3143 if (!dtrace_priv_proc(state, mstate)) 3144 return (0); 3145 3146 /* 3147 * See comment in DIF_VAR_PID. 3148 */ 3149 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3150 return ((uint64_t)(uintptr_t)p0.p_user.u_comm); 3151 3152 /* 3153 * It is always safe to dereference one's own t_procp pointer: 3154 * it always points to a valid, allocated proc structure. 3155 * (This is true because threads don't clean up their own 3156 * state -- they leave that task to whomever reaps them.) 3157 */ 3158 return (dtrace_dif_varstr( 3159 (uintptr_t)curthread->t_procp->p_user.u_comm, 3160 state, mstate)); 3161 3162 case DIF_VAR_ZONENAME: 3163 if (!dtrace_priv_proc(state, mstate)) 3164 return (0); 3165 3166 /* 3167 * See comment in DIF_VAR_PID. 3168 */ 3169 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3170 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); 3171 3172 /* 3173 * It is always safe to dereference one's own t_procp pointer: 3174 * it always points to a valid, allocated proc structure. 3175 * (This is true because threads don't clean up their own 3176 * state -- they leave that task to whomever reaps them.) 3177 */ 3178 return (dtrace_dif_varstr( 3179 (uintptr_t)curthread->t_procp->p_zone->zone_name, 3180 state, mstate)); 3181 3182 case DIF_VAR_UID: 3183 if (!dtrace_priv_proc(state, mstate)) 3184 return (0); 3185 3186 /* 3187 * See comment in DIF_VAR_PID. 3188 */ 3189 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3190 return ((uint64_t)p0.p_cred->cr_uid); 3191 3192 /* 3193 * It is always safe to dereference one's own t_procp pointer: 3194 * it always points to a valid, allocated proc structure. 3195 * (This is true because threads don't clean up their own 3196 * state -- they leave that task to whomever reaps them.) 3197 * 3198 * Additionally, it is safe to dereference one's own process 3199 * credential, since this is never NULL after process birth. 3200 */ 3201 return ((uint64_t)curthread->t_procp->p_cred->cr_uid); 3202 3203 case DIF_VAR_GID: 3204 if (!dtrace_priv_proc(state, mstate)) 3205 return (0); 3206 3207 /* 3208 * See comment in DIF_VAR_PID. 3209 */ 3210 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3211 return ((uint64_t)p0.p_cred->cr_gid); 3212 3213 /* 3214 * It is always safe to dereference one's own t_procp pointer: 3215 * it always points to a valid, allocated proc structure. 3216 * (This is true because threads don't clean up their own 3217 * state -- they leave that task to whomever reaps them.) 3218 * 3219 * Additionally, it is safe to dereference one's own process 3220 * credential, since this is never NULL after process birth. 3221 */ 3222 return ((uint64_t)curthread->t_procp->p_cred->cr_gid); 3223 3224 case DIF_VAR_ERRNO: { 3225 klwp_t *lwp; 3226 if (!dtrace_priv_proc(state, mstate)) 3227 return (0); 3228 3229 /* 3230 * See comment in DIF_VAR_PID. 3231 */ 3232 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3233 return (0); 3234 3235 /* 3236 * It is always safe to dereference one's own t_lwp pointer in 3237 * the event that this pointer is non-NULL. (This is true 3238 * because threads and lwps don't clean up their own state -- 3239 * they leave that task to whomever reaps them.) 3240 */ 3241 if ((lwp = curthread->t_lwp) == NULL) 3242 return (0); 3243 3244 return ((uint64_t)lwp->lwp_errno); 3245 } 3246 default: 3247 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 3248 return (0); 3249 } 3250 } 3251 3252 /* 3253 * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 3254 * Notice that we don't bother validating the proper number of arguments or 3255 * their types in the tuple stack. This isn't needed because all argument 3256 * interpretation is safe because of our load safety -- the worst that can 3257 * happen is that a bogus program can obtain bogus results. 3258 */ 3259 static void 3260 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, 3261 dtrace_key_t *tupregs, int nargs, 3262 dtrace_mstate_t *mstate, dtrace_state_t *state) 3263 { 3264 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 3265 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 3266 dtrace_vstate_t *vstate = &state->dts_vstate; 3267 3268 union { 3269 mutex_impl_t mi; 3270 uint64_t mx; 3271 } m; 3272 3273 union { 3274 krwlock_t ri; 3275 uintptr_t rw; 3276 } r; 3277 3278 switch (subr) { 3279 case DIF_SUBR_RAND: 3280 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; 3281 break; 3282 3283 case DIF_SUBR_MUTEX_OWNED: 3284 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3285 mstate, vstate)) { 3286 regs[rd] = NULL; 3287 break; 3288 } 3289 3290 m.mx = dtrace_load64(tupregs[0].dttk_value); 3291 if (MUTEX_TYPE_ADAPTIVE(&m.mi)) 3292 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; 3293 else 3294 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock); 3295 break; 3296 3297 case DIF_SUBR_MUTEX_OWNER: 3298 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3299 mstate, vstate)) { 3300 regs[rd] = NULL; 3301 break; 3302 } 3303 3304 m.mx = dtrace_load64(tupregs[0].dttk_value); 3305 if (MUTEX_TYPE_ADAPTIVE(&m.mi) && 3306 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) 3307 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi); 3308 else 3309 regs[rd] = 0; 3310 break; 3311 3312 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 3313 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3314 mstate, vstate)) { 3315 regs[rd] = NULL; 3316 break; 3317 } 3318 3319 m.mx = dtrace_load64(tupregs[0].dttk_value); 3320 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); 3321 break; 3322 3323 case DIF_SUBR_MUTEX_TYPE_SPIN: 3324 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3325 mstate, vstate)) { 3326 regs[rd] = NULL; 3327 break; 3328 } 3329 3330 m.mx = dtrace_load64(tupregs[0].dttk_value); 3331 regs[rd] = MUTEX_TYPE_SPIN(&m.mi); 3332 break; 3333 3334 case DIF_SUBR_RW_READ_HELD: { 3335 uintptr_t tmp; 3336 3337 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 3338 mstate, vstate)) { 3339 regs[rd] = NULL; 3340 break; 3341 } 3342 3343 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3344 regs[rd] = _RW_READ_HELD(&r.ri, tmp); 3345 break; 3346 } 3347 3348 case DIF_SUBR_RW_WRITE_HELD: 3349 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 3350 mstate, vstate)) { 3351 regs[rd] = NULL; 3352 break; 3353 } 3354 3355 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3356 regs[rd] = _RW_WRITE_HELD(&r.ri); 3357 break; 3358 3359 case DIF_SUBR_RW_ISWRITER: 3360 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 3361 mstate, vstate)) { 3362 regs[rd] = NULL; 3363 break; 3364 } 3365 3366 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3367 regs[rd] = _RW_ISWRITER(&r.ri); 3368 break; 3369 3370 case DIF_SUBR_BCOPY: { 3371 /* 3372 * We need to be sure that the destination is in the scratch 3373 * region -- no other region is allowed. 3374 */ 3375 uintptr_t src = tupregs[0].dttk_value; 3376 uintptr_t dest = tupregs[1].dttk_value; 3377 size_t size = tupregs[2].dttk_value; 3378 3379 if (!dtrace_inscratch(dest, size, mstate)) { 3380 *flags |= CPU_DTRACE_BADADDR; 3381 *illval = regs[rd]; 3382 break; 3383 } 3384 3385 if (!dtrace_canload(src, size, mstate, vstate)) { 3386 regs[rd] = NULL; 3387 break; 3388 } 3389 3390 dtrace_bcopy((void *)src, (void *)dest, size); 3391 break; 3392 } 3393 3394 case DIF_SUBR_ALLOCA: 3395 case DIF_SUBR_COPYIN: { 3396 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 3397 uint64_t size = 3398 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; 3399 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; 3400 3401 /* 3402 * This action doesn't require any credential checks since 3403 * probes will not activate in user contexts to which the 3404 * enabling user does not have permissions. 3405 */ 3406 3407 /* 3408 * Rounding up the user allocation size could have overflowed 3409 * a large, bogus allocation (like -1ULL) to 0. 3410 */ 3411 if (scratch_size < size || 3412 !DTRACE_INSCRATCH(mstate, scratch_size)) { 3413 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3414 regs[rd] = NULL; 3415 break; 3416 } 3417 3418 if (subr == DIF_SUBR_COPYIN) { 3419 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3420 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 3421 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3422 } 3423 3424 mstate->dtms_scratch_ptr += scratch_size; 3425 regs[rd] = dest; 3426 break; 3427 } 3428 3429 case DIF_SUBR_COPYINTO: { 3430 uint64_t size = tupregs[1].dttk_value; 3431 uintptr_t dest = tupregs[2].dttk_value; 3432 3433 /* 3434 * This action doesn't require any credential checks since 3435 * probes will not activate in user contexts to which the 3436 * enabling user does not have permissions. 3437 */ 3438 if (!dtrace_inscratch(dest, size, mstate)) { 3439 *flags |= CPU_DTRACE_BADADDR; 3440 *illval = regs[rd]; 3441 break; 3442 } 3443 3444 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3445 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 3446 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3447 break; 3448 } 3449 3450 case DIF_SUBR_COPYINSTR: { 3451 uintptr_t dest = mstate->dtms_scratch_ptr; 3452 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3453 3454 if (nargs > 1 && tupregs[1].dttk_value < size) 3455 size = tupregs[1].dttk_value + 1; 3456 3457 /* 3458 * This action doesn't require any credential checks since 3459 * probes will not activate in user contexts to which the 3460 * enabling user does not have permissions. 3461 */ 3462 if (!DTRACE_INSCRATCH(mstate, size)) { 3463 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3464 regs[rd] = NULL; 3465 break; 3466 } 3467 3468 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3469 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); 3470 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3471 3472 ((char *)dest)[size - 1] = '\0'; 3473 mstate->dtms_scratch_ptr += size; 3474 regs[rd] = dest; 3475 break; 3476 } 3477 3478 case DIF_SUBR_MSGSIZE: 3479 case DIF_SUBR_MSGDSIZE: { 3480 uintptr_t baddr = tupregs[0].dttk_value, daddr; 3481 uintptr_t wptr, rptr; 3482 size_t count = 0; 3483 int cont = 0; 3484 3485 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 3486 3487 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, 3488 vstate)) { 3489 regs[rd] = NULL; 3490 break; 3491 } 3492 3493 wptr = dtrace_loadptr(baddr + 3494 offsetof(mblk_t, b_wptr)); 3495 3496 rptr = dtrace_loadptr(baddr + 3497 offsetof(mblk_t, b_rptr)); 3498 3499 if (wptr < rptr) { 3500 *flags |= CPU_DTRACE_BADADDR; 3501 *illval = tupregs[0].dttk_value; 3502 break; 3503 } 3504 3505 daddr = dtrace_loadptr(baddr + 3506 offsetof(mblk_t, b_datap)); 3507 3508 baddr = dtrace_loadptr(baddr + 3509 offsetof(mblk_t, b_cont)); 3510 3511 /* 3512 * We want to prevent against denial-of-service here, 3513 * so we're only going to search the list for 3514 * dtrace_msgdsize_max mblks. 3515 */ 3516 if (cont++ > dtrace_msgdsize_max) { 3517 *flags |= CPU_DTRACE_ILLOP; 3518 break; 3519 } 3520 3521 if (subr == DIF_SUBR_MSGDSIZE) { 3522 if (dtrace_load8(daddr + 3523 offsetof(dblk_t, db_type)) != M_DATA) 3524 continue; 3525 } 3526 3527 count += wptr - rptr; 3528 } 3529 3530 if (!(*flags & CPU_DTRACE_FAULT)) 3531 regs[rd] = count; 3532 3533 break; 3534 } 3535 3536 case DIF_SUBR_PROGENYOF: { 3537 pid_t pid = tupregs[0].dttk_value; 3538 proc_t *p; 3539 int rval = 0; 3540 3541 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3542 3543 for (p = curthread->t_procp; p != NULL; p = p->p_parent) { 3544 if (p->p_pidp->pid_id == pid) { 3545 rval = 1; 3546 break; 3547 } 3548 } 3549 3550 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3551 3552 regs[rd] = rval; 3553 break; 3554 } 3555 3556 case DIF_SUBR_SPECULATION: 3557 regs[rd] = dtrace_speculation(state); 3558 break; 3559 3560 case DIF_SUBR_COPYOUT: { 3561 uintptr_t kaddr = tupregs[0].dttk_value; 3562 uintptr_t uaddr = tupregs[1].dttk_value; 3563 uint64_t size = tupregs[2].dttk_value; 3564 3565 if (!dtrace_destructive_disallow && 3566 dtrace_priv_proc_control(state, mstate) && 3567 !dtrace_istoxic(kaddr, size)) { 3568 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3569 dtrace_copyout(kaddr, uaddr, size, flags); 3570 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3571 } 3572 break; 3573 } 3574 3575 case DIF_SUBR_COPYOUTSTR: { 3576 uintptr_t kaddr = tupregs[0].dttk_value; 3577 uintptr_t uaddr = tupregs[1].dttk_value; 3578 uint64_t size = tupregs[2].dttk_value; 3579 3580 if (!dtrace_destructive_disallow && 3581 dtrace_priv_proc_control(state, mstate) && 3582 !dtrace_istoxic(kaddr, size)) { 3583 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3584 dtrace_copyoutstr(kaddr, uaddr, size, flags); 3585 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3586 } 3587 break; 3588 } 3589 3590 case DIF_SUBR_STRLEN: { 3591 size_t sz; 3592 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; 3593 sz = dtrace_strlen((char *)addr, 3594 state->dts_options[DTRACEOPT_STRSIZE]); 3595 3596 if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { 3597 regs[rd] = NULL; 3598 break; 3599 } 3600 3601 regs[rd] = sz; 3602 3603 break; 3604 } 3605 3606 case DIF_SUBR_STRCHR: 3607 case DIF_SUBR_STRRCHR: { 3608 /* 3609 * We're going to iterate over the string looking for the 3610 * specified character. We will iterate until we have reached 3611 * the string length or we have found the character. If this 3612 * is DIF_SUBR_STRRCHR, we will look for the last occurrence 3613 * of the specified character instead of the first. 3614 */ 3615 uintptr_t saddr = tupregs[0].dttk_value; 3616 uintptr_t addr = tupregs[0].dttk_value; 3617 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; 3618 char c, target = (char)tupregs[1].dttk_value; 3619 3620 for (regs[rd] = NULL; addr < limit; addr++) { 3621 if ((c = dtrace_load8(addr)) == target) { 3622 regs[rd] = addr; 3623 3624 if (subr == DIF_SUBR_STRCHR) 3625 break; 3626 } 3627 3628 if (c == '\0') 3629 break; 3630 } 3631 3632 if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { 3633 regs[rd] = NULL; 3634 break; 3635 } 3636 3637 break; 3638 } 3639 3640 case DIF_SUBR_STRSTR: 3641 case DIF_SUBR_INDEX: 3642 case DIF_SUBR_RINDEX: { 3643 /* 3644 * We're going to iterate over the string looking for the 3645 * specified string. We will iterate until we have reached 3646 * the string length or we have found the string. (Yes, this 3647 * is done in the most naive way possible -- but considering 3648 * that the string we're searching for is likely to be 3649 * relatively short, the complexity of Rabin-Karp or similar 3650 * hardly seems merited.) 3651 */ 3652 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value; 3653 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value; 3654 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3655 size_t len = dtrace_strlen(addr, size); 3656 size_t sublen = dtrace_strlen(substr, size); 3657 char *limit = addr + len, *orig = addr; 3658 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; 3659 int inc = 1; 3660 3661 regs[rd] = notfound; 3662 3663 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { 3664 regs[rd] = NULL; 3665 break; 3666 } 3667 3668 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, 3669 vstate)) { 3670 regs[rd] = NULL; 3671 break; 3672 } 3673 3674 /* 3675 * strstr() and index()/rindex() have similar semantics if 3676 * both strings are the empty string: strstr() returns a 3677 * pointer to the (empty) string, and index() and rindex() 3678 * both return index 0 (regardless of any position argument). 3679 */ 3680 if (sublen == 0 && len == 0) { 3681 if (subr == DIF_SUBR_STRSTR) 3682 regs[rd] = (uintptr_t)addr; 3683 else 3684 regs[rd] = 0; 3685 break; 3686 } 3687 3688 if (subr != DIF_SUBR_STRSTR) { 3689 if (subr == DIF_SUBR_RINDEX) { 3690 limit = orig - 1; 3691 addr += len; 3692 inc = -1; 3693 } 3694 3695 /* 3696 * Both index() and rindex() take an optional position 3697 * argument that denotes the starting position. 3698 */ 3699 if (nargs == 3) { 3700 int64_t pos = (int64_t)tupregs[2].dttk_value; 3701 3702 /* 3703 * If the position argument to index() is 3704 * negative, Perl implicitly clamps it at 3705 * zero. This semantic is a little surprising 3706 * given the special meaning of negative 3707 * positions to similar Perl functions like 3708 * substr(), but it appears to reflect a 3709 * notion that index() can start from a 3710 * negative index and increment its way up to 3711 * the string. Given this notion, Perl's 3712 * rindex() is at least self-consistent in 3713 * that it implicitly clamps positions greater 3714 * than the string length to be the string 3715 * length. Where Perl completely loses 3716 * coherence, however, is when the specified 3717 * substring is the empty string (""). In 3718 * this case, even if the position is 3719 * negative, rindex() returns 0 -- and even if 3720 * the position is greater than the length, 3721 * index() returns the string length. These 3722 * semantics violate the notion that index() 3723 * should never return a value less than the 3724 * specified position and that rindex() should 3725 * never return a value greater than the 3726 * specified position. (One assumes that 3727 * these semantics are artifacts of Perl's 3728 * implementation and not the results of 3729 * deliberate design -- it beggars belief that 3730 * even Larry Wall could desire such oddness.) 3731 * While in the abstract one would wish for 3732 * consistent position semantics across 3733 * substr(), index() and rindex() -- or at the 3734 * very least self-consistent position 3735 * semantics for index() and rindex() -- we 3736 * instead opt to keep with the extant Perl 3737 * semantics, in all their broken glory. (Do 3738 * we have more desire to maintain Perl's 3739 * semantics than Perl does? Probably.) 3740 */ 3741 if (subr == DIF_SUBR_RINDEX) { 3742 if (pos < 0) { 3743 if (sublen == 0) 3744 regs[rd] = 0; 3745 break; 3746 } 3747 3748 if (pos > len) 3749 pos = len; 3750 } else { 3751 if (pos < 0) 3752 pos = 0; 3753 3754 if (pos >= len) { 3755 if (sublen == 0) 3756 regs[rd] = len; 3757 break; 3758 } 3759 } 3760 3761 addr = orig + pos; 3762 } 3763 } 3764 3765 for (regs[rd] = notfound; addr != limit; addr += inc) { 3766 if (dtrace_strncmp(addr, substr, sublen) == 0) { 3767 if (subr != DIF_SUBR_STRSTR) { 3768 /* 3769 * As D index() and rindex() are 3770 * modeled on Perl (and not on awk), 3771 * we return a zero-based (and not a 3772 * one-based) index. (For you Perl 3773 * weenies: no, we're not going to add 3774 * $[ -- and shouldn't you be at a con 3775 * or something?) 3776 */ 3777 regs[rd] = (uintptr_t)(addr - orig); 3778 break; 3779 } 3780 3781 ASSERT(subr == DIF_SUBR_STRSTR); 3782 regs[rd] = (uintptr_t)addr; 3783 break; 3784 } 3785 } 3786 3787 break; 3788 } 3789 3790 case DIF_SUBR_STRTOK: { 3791 uintptr_t addr = tupregs[0].dttk_value; 3792 uintptr_t tokaddr = tupregs[1].dttk_value; 3793 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3794 uintptr_t limit, toklimit = tokaddr + size; 3795 uint8_t c, tokmap[32]; /* 256 / 8 */ 3796 char *dest = (char *)mstate->dtms_scratch_ptr; 3797 int i; 3798 3799 /* 3800 * Check both the token buffer and (later) the input buffer, 3801 * since both could be non-scratch addresses. 3802 */ 3803 if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { 3804 regs[rd] = NULL; 3805 break; 3806 } 3807 3808 if (!DTRACE_INSCRATCH(mstate, size)) { 3809 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3810 regs[rd] = NULL; 3811 break; 3812 } 3813 3814 if (addr == NULL) { 3815 /* 3816 * If the address specified is NULL, we use our saved 3817 * strtok pointer from the mstate. Note that this 3818 * means that the saved strtok pointer is _only_ 3819 * valid within multiple enablings of the same probe -- 3820 * it behaves like an implicit clause-local variable. 3821 */ 3822 addr = mstate->dtms_strtok; 3823 } else { 3824 /* 3825 * If the user-specified address is non-NULL we must 3826 * access check it. This is the only time we have 3827 * a chance to do so, since this address may reside 3828 * in the string table of this clause-- future calls 3829 * (when we fetch addr from mstate->dtms_strtok) 3830 * would fail this access check. 3831 */ 3832 if (!dtrace_strcanload(addr, size, mstate, vstate)) { 3833 regs[rd] = NULL; 3834 break; 3835 } 3836 } 3837 3838 /* 3839 * First, zero the token map, and then process the token 3840 * string -- setting a bit in the map for every character 3841 * found in the token string. 3842 */ 3843 for (i = 0; i < sizeof (tokmap); i++) 3844 tokmap[i] = 0; 3845 3846 for (; tokaddr < toklimit; tokaddr++) { 3847 if ((c = dtrace_load8(tokaddr)) == '\0') 3848 break; 3849 3850 ASSERT((c >> 3) < sizeof (tokmap)); 3851 tokmap[c >> 3] |= (1 << (c & 0x7)); 3852 } 3853 3854 for (limit = addr + size; addr < limit; addr++) { 3855 /* 3856 * We're looking for a character that is _not_ contained 3857 * in the token string. 3858 */ 3859 if ((c = dtrace_load8(addr)) == '\0') 3860 break; 3861 3862 if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) 3863 break; 3864 } 3865 3866 if (c == '\0') { 3867 /* 3868 * We reached the end of the string without finding 3869 * any character that was not in the token string. 3870 * We return NULL in this case, and we set the saved 3871 * address to NULL as well. 3872 */ 3873 regs[rd] = NULL; 3874 mstate->dtms_strtok = NULL; 3875 break; 3876 } 3877 3878 /* 3879 * From here on, we're copying into the destination string. 3880 */ 3881 for (i = 0; addr < limit && i < size - 1; addr++) { 3882 if ((c = dtrace_load8(addr)) == '\0') 3883 break; 3884 3885 if (tokmap[c >> 3] & (1 << (c & 0x7))) 3886 break; 3887 3888 ASSERT(i < size); 3889 dest[i++] = c; 3890 } 3891 3892 ASSERT(i < size); 3893 dest[i] = '\0'; 3894 regs[rd] = (uintptr_t)dest; 3895 mstate->dtms_scratch_ptr += size; 3896 mstate->dtms_strtok = addr; 3897 break; 3898 } 3899 3900 case DIF_SUBR_SUBSTR: { 3901 uintptr_t s = tupregs[0].dttk_value; 3902 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3903 char *d = (char *)mstate->dtms_scratch_ptr; 3904 int64_t index = (int64_t)tupregs[1].dttk_value; 3905 int64_t remaining = (int64_t)tupregs[2].dttk_value; 3906 size_t len = dtrace_strlen((char *)s, size); 3907 int64_t i; 3908 3909 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 3910 regs[rd] = NULL; 3911 break; 3912 } 3913 3914 if (!DTRACE_INSCRATCH(mstate, size)) { 3915 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3916 regs[rd] = NULL; 3917 break; 3918 } 3919 3920 if (nargs <= 2) 3921 remaining = (int64_t)size; 3922 3923 if (index < 0) { 3924 index += len; 3925 3926 if (index < 0 && index + remaining > 0) { 3927 remaining += index; 3928 index = 0; 3929 } 3930 } 3931 3932 if (index >= len || index < 0) { 3933 remaining = 0; 3934 } else if (remaining < 0) { 3935 remaining += len - index; 3936 } else if (index + remaining > size) { 3937 remaining = size - index; 3938 } 3939 3940 for (i = 0; i < remaining; i++) { 3941 if ((d[i] = dtrace_load8(s + index + i)) == '\0') 3942 break; 3943 } 3944 3945 d[i] = '\0'; 3946 3947 mstate->dtms_scratch_ptr += size; 3948 regs[rd] = (uintptr_t)d; 3949 break; 3950 } 3951 3952 case DIF_SUBR_TOUPPER: 3953 case DIF_SUBR_TOLOWER: { 3954 uintptr_t s = tupregs[0].dttk_value; 3955 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3956 char *dest = (char *)mstate->dtms_scratch_ptr, c; 3957 size_t len = dtrace_strlen((char *)s, size); 3958 char lower, upper, convert; 3959 int64_t i; 3960 3961 if (subr == DIF_SUBR_TOUPPER) { 3962 lower = 'a'; 3963 upper = 'z'; 3964 convert = 'A'; 3965 } else { 3966 lower = 'A'; 3967 upper = 'Z'; 3968 convert = 'a'; 3969 } 3970 3971 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 3972 regs[rd] = NULL; 3973 break; 3974 } 3975 3976 if (!DTRACE_INSCRATCH(mstate, size)) { 3977 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3978 regs[rd] = NULL; 3979 break; 3980 } 3981 3982 for (i = 0; i < size - 1; i++) { 3983 if ((c = dtrace_load8(s + i)) == '\0') 3984 break; 3985 3986 if (c >= lower && c <= upper) 3987 c = convert + (c - lower); 3988 3989 dest[i] = c; 3990 } 3991 3992 ASSERT(i < size); 3993 dest[i] = '\0'; 3994 regs[rd] = (uintptr_t)dest; 3995 mstate->dtms_scratch_ptr += size; 3996 break; 3997 } 3998 3999 case DIF_SUBR_GETMAJOR: 4000 #ifdef _LP64 4001 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; 4002 #else 4003 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; 4004 #endif 4005 break; 4006 4007 case DIF_SUBR_GETMINOR: 4008 #ifdef _LP64 4009 regs[rd] = tupregs[0].dttk_value & MAXMIN64; 4010 #else 4011 regs[rd] = tupregs[0].dttk_value & MAXMIN; 4012 #endif 4013 break; 4014 4015 case DIF_SUBR_DDI_PATHNAME: { 4016 /* 4017 * This one is a galactic mess. We are going to roughly 4018 * emulate ddi_pathname(), but it's made more complicated 4019 * by the fact that we (a) want to include the minor name and 4020 * (b) must proceed iteratively instead of recursively. 4021 */ 4022 uintptr_t dest = mstate->dtms_scratch_ptr; 4023 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4024 char *start = (char *)dest, *end = start + size - 1; 4025 uintptr_t daddr = tupregs[0].dttk_value; 4026 int64_t minor = (int64_t)tupregs[1].dttk_value; 4027 char *s; 4028 int i, len, depth = 0; 4029 4030 /* 4031 * Due to all the pointer jumping we do and context we must 4032 * rely upon, we just mandate that the user must have kernel 4033 * read privileges to use this routine. 4034 */ 4035 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { 4036 *flags |= CPU_DTRACE_KPRIV; 4037 *illval = daddr; 4038 regs[rd] = NULL; 4039 } 4040 4041 if (!DTRACE_INSCRATCH(mstate, size)) { 4042 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4043 regs[rd] = NULL; 4044 break; 4045 } 4046 4047 *end = '\0'; 4048 4049 /* 4050 * We want to have a name for the minor. In order to do this, 4051 * we need to walk the minor list from the devinfo. We want 4052 * to be sure that we don't infinitely walk a circular list, 4053 * so we check for circularity by sending a scout pointer 4054 * ahead two elements for every element that we iterate over; 4055 * if the list is circular, these will ultimately point to the 4056 * same element. You may recognize this little trick as the 4057 * answer to a stupid interview question -- one that always 4058 * seems to be asked by those who had to have it laboriously 4059 * explained to them, and who can't even concisely describe 4060 * the conditions under which one would be forced to resort to 4061 * this technique. Needless to say, those conditions are 4062 * found here -- and probably only here. Is this the only use 4063 * of this infamous trick in shipping, production code? If it 4064 * isn't, it probably should be... 4065 */ 4066 if (minor != -1) { 4067 uintptr_t maddr = dtrace_loadptr(daddr + 4068 offsetof(struct dev_info, devi_minor)); 4069 4070 uintptr_t next = offsetof(struct ddi_minor_data, next); 4071 uintptr_t name = offsetof(struct ddi_minor_data, 4072 d_minor) + offsetof(struct ddi_minor, name); 4073 uintptr_t dev = offsetof(struct ddi_minor_data, 4074 d_minor) + offsetof(struct ddi_minor, dev); 4075 uintptr_t scout; 4076 4077 if (maddr != NULL) 4078 scout = dtrace_loadptr(maddr + next); 4079 4080 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 4081 uint64_t m; 4082 #ifdef _LP64 4083 m = dtrace_load64(maddr + dev) & MAXMIN64; 4084 #else 4085 m = dtrace_load32(maddr + dev) & MAXMIN; 4086 #endif 4087 if (m != minor) { 4088 maddr = dtrace_loadptr(maddr + next); 4089 4090 if (scout == NULL) 4091 continue; 4092 4093 scout = dtrace_loadptr(scout + next); 4094 4095 if (scout == NULL) 4096 continue; 4097 4098 scout = dtrace_loadptr(scout + next); 4099 4100 if (scout == NULL) 4101 continue; 4102 4103 if (scout == maddr) { 4104 *flags |= CPU_DTRACE_ILLOP; 4105 break; 4106 } 4107 4108 continue; 4109 } 4110 4111 /* 4112 * We have the minor data. Now we need to 4113 * copy the minor's name into the end of the 4114 * pathname. 4115 */ 4116 s = (char *)dtrace_loadptr(maddr + name); 4117 len = dtrace_strlen(s, size); 4118 4119 if (*flags & CPU_DTRACE_FAULT) 4120 break; 4121 4122 if (len != 0) { 4123 if ((end -= (len + 1)) < start) 4124 break; 4125 4126 *end = ':'; 4127 } 4128 4129 for (i = 1; i <= len; i++) 4130 end[i] = dtrace_load8((uintptr_t)s++); 4131 break; 4132 } 4133 } 4134 4135 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 4136 ddi_node_state_t devi_state; 4137 4138 devi_state = dtrace_load32(daddr + 4139 offsetof(struct dev_info, devi_node_state)); 4140 4141 if (*flags & CPU_DTRACE_FAULT) 4142 break; 4143 4144 if (devi_state >= DS_INITIALIZED) { 4145 s = (char *)dtrace_loadptr(daddr + 4146 offsetof(struct dev_info, devi_addr)); 4147 len = dtrace_strlen(s, size); 4148 4149 if (*flags & CPU_DTRACE_FAULT) 4150 break; 4151 4152 if (len != 0) { 4153 if ((end -= (len + 1)) < start) 4154 break; 4155 4156 *end = '@'; 4157 } 4158 4159 for (i = 1; i <= len; i++) 4160 end[i] = dtrace_load8((uintptr_t)s++); 4161 } 4162 4163 /* 4164 * Now for the node name... 4165 */ 4166 s = (char *)dtrace_loadptr(daddr + 4167 offsetof(struct dev_info, devi_node_name)); 4168 4169 daddr = dtrace_loadptr(daddr + 4170 offsetof(struct dev_info, devi_parent)); 4171 4172 /* 4173 * If our parent is NULL (that is, if we're the root 4174 * node), we're going to use the special path 4175 * "devices". 4176 */ 4177 if (daddr == NULL) 4178 s = "devices"; 4179 4180 len = dtrace_strlen(s, size); 4181 if (*flags & CPU_DTRACE_FAULT) 4182 break; 4183 4184 if ((end -= (len + 1)) < start) 4185 break; 4186 4187 for (i = 1; i <= len; i++) 4188 end[i] = dtrace_load8((uintptr_t)s++); 4189 *end = '/'; 4190 4191 if (depth++ > dtrace_devdepth_max) { 4192 *flags |= CPU_DTRACE_ILLOP; 4193 break; 4194 } 4195 } 4196 4197 if (end < start) 4198 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4199 4200 if (daddr == NULL) { 4201 regs[rd] = (uintptr_t)end; 4202 mstate->dtms_scratch_ptr += size; 4203 } 4204 4205 break; 4206 } 4207 4208 case DIF_SUBR_STRJOIN: { 4209 char *d = (char *)mstate->dtms_scratch_ptr; 4210 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4211 uintptr_t s1 = tupregs[0].dttk_value; 4212 uintptr_t s2 = tupregs[1].dttk_value; 4213 int i = 0; 4214 4215 if (!dtrace_strcanload(s1, size, mstate, vstate) || 4216 !dtrace_strcanload(s2, size, mstate, vstate)) { 4217 regs[rd] = NULL; 4218 break; 4219 } 4220 4221 if (!DTRACE_INSCRATCH(mstate, size)) { 4222 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4223 regs[rd] = NULL; 4224 break; 4225 } 4226 4227 for (;;) { 4228 if (i >= size) { 4229 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4230 regs[rd] = NULL; 4231 break; 4232 } 4233 4234 if ((d[i++] = dtrace_load8(s1++)) == '\0') { 4235 i--; 4236 break; 4237 } 4238 } 4239 4240 for (;;) { 4241 if (i >= size) { 4242 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4243 regs[rd] = NULL; 4244 break; 4245 } 4246 4247 if ((d[i++] = dtrace_load8(s2++)) == '\0') 4248 break; 4249 } 4250 4251 if (i < size) { 4252 mstate->dtms_scratch_ptr += i; 4253 regs[rd] = (uintptr_t)d; 4254 } 4255 4256 break; 4257 } 4258 4259 case DIF_SUBR_LLTOSTR: { 4260 int64_t i = (int64_t)tupregs[0].dttk_value; 4261 uint64_t val, digit; 4262 uint64_t size = 65; /* enough room for 2^64 in binary */ 4263 char *end = (char *)mstate->dtms_scratch_ptr + size - 1; 4264 int base = 10; 4265 4266 if (nargs > 1) { 4267 if ((base = tupregs[1].dttk_value) <= 1 || 4268 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { 4269 *flags |= CPU_DTRACE_ILLOP; 4270 break; 4271 } 4272 } 4273 4274 val = (base == 10 && i < 0) ? i * -1 : i; 4275 4276 if (!DTRACE_INSCRATCH(mstate, size)) { 4277 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4278 regs[rd] = NULL; 4279 break; 4280 } 4281 4282 for (*end-- = '\0'; val; val /= base) { 4283 if ((digit = val % base) <= '9' - '0') { 4284 *end-- = '0' + digit; 4285 } else { 4286 *end-- = 'a' + (digit - ('9' - '0') - 1); 4287 } 4288 } 4289 4290 if (i == 0 && base == 16) 4291 *end-- = '0'; 4292 4293 if (base == 16) 4294 *end-- = 'x'; 4295 4296 if (i == 0 || base == 8 || base == 16) 4297 *end-- = '0'; 4298 4299 if (i < 0 && base == 10) 4300 *end-- = '-'; 4301 4302 regs[rd] = (uintptr_t)end + 1; 4303 mstate->dtms_scratch_ptr += size; 4304 break; 4305 } 4306 4307 case DIF_SUBR_HTONS: 4308 case DIF_SUBR_NTOHS: 4309 #ifdef _BIG_ENDIAN 4310 regs[rd] = (uint16_t)tupregs[0].dttk_value; 4311 #else 4312 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); 4313 #endif 4314 break; 4315 4316 4317 case DIF_SUBR_HTONL: 4318 case DIF_SUBR_NTOHL: 4319 #ifdef _BIG_ENDIAN 4320 regs[rd] = (uint32_t)tupregs[0].dttk_value; 4321 #else 4322 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); 4323 #endif 4324 break; 4325 4326 4327 case DIF_SUBR_HTONLL: 4328 case DIF_SUBR_NTOHLL: 4329 #ifdef _BIG_ENDIAN 4330 regs[rd] = (uint64_t)tupregs[0].dttk_value; 4331 #else 4332 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); 4333 #endif 4334 break; 4335 4336 4337 case DIF_SUBR_DIRNAME: 4338 case DIF_SUBR_BASENAME: { 4339 char *dest = (char *)mstate->dtms_scratch_ptr; 4340 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4341 uintptr_t src = tupregs[0].dttk_value; 4342 int i, j, len = dtrace_strlen((char *)src, size); 4343 int lastbase = -1, firstbase = -1, lastdir = -1; 4344 int start, end; 4345 4346 if (!dtrace_canload(src, len + 1, mstate, vstate)) { 4347 regs[rd] = NULL; 4348 break; 4349 } 4350 4351 if (!DTRACE_INSCRATCH(mstate, size)) { 4352 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4353 regs[rd] = NULL; 4354 break; 4355 } 4356 4357 /* 4358 * The basename and dirname for a zero-length string is 4359 * defined to be "." 4360 */ 4361 if (len == 0) { 4362 len = 1; 4363 src = (uintptr_t)"."; 4364 } 4365 4366 /* 4367 * Start from the back of the string, moving back toward the 4368 * front until we see a character that isn't a slash. That 4369 * character is the last character in the basename. 4370 */ 4371 for (i = len - 1; i >= 0; i--) { 4372 if (dtrace_load8(src + i) != '/') 4373 break; 4374 } 4375 4376 if (i >= 0) 4377 lastbase = i; 4378 4379 /* 4380 * Starting from the last character in the basename, move 4381 * towards the front until we find a slash. The character 4382 * that we processed immediately before that is the first 4383 * character in the basename. 4384 */ 4385 for (; i >= 0; i--) { 4386 if (dtrace_load8(src + i) == '/') 4387 break; 4388 } 4389 4390 if (i >= 0) 4391 firstbase = i + 1; 4392 4393 /* 4394 * Now keep going until we find a non-slash character. That 4395 * character is the last character in the dirname. 4396 */ 4397 for (; i >= 0; i--) { 4398 if (dtrace_load8(src + i) != '/') 4399 break; 4400 } 4401 4402 if (i >= 0) 4403 lastdir = i; 4404 4405 ASSERT(!(lastbase == -1 && firstbase != -1)); 4406 ASSERT(!(firstbase == -1 && lastdir != -1)); 4407 4408 if (lastbase == -1) { 4409 /* 4410 * We didn't find a non-slash character. We know that 4411 * the length is non-zero, so the whole string must be 4412 * slashes. In either the dirname or the basename 4413 * case, we return '/'. 4414 */ 4415 ASSERT(firstbase == -1); 4416 firstbase = lastbase = lastdir = 0; 4417 } 4418 4419 if (firstbase == -1) { 4420 /* 4421 * The entire string consists only of a basename 4422 * component. If we're looking for dirname, we need 4423 * to change our string to be just "."; if we're 4424 * looking for a basename, we'll just set the first 4425 * character of the basename to be 0. 4426 */ 4427 if (subr == DIF_SUBR_DIRNAME) { 4428 ASSERT(lastdir == -1); 4429 src = (uintptr_t)"."; 4430 lastdir = 0; 4431 } else { 4432 firstbase = 0; 4433 } 4434 } 4435 4436 if (subr == DIF_SUBR_DIRNAME) { 4437 if (lastdir == -1) { 4438 /* 4439 * We know that we have a slash in the name -- 4440 * or lastdir would be set to 0, above. And 4441 * because lastdir is -1, we know that this 4442 * slash must be the first character. (That 4443 * is, the full string must be of the form 4444 * "/basename".) In this case, the last 4445 * character of the directory name is 0. 4446 */ 4447 lastdir = 0; 4448 } 4449 4450 start = 0; 4451 end = lastdir; 4452 } else { 4453 ASSERT(subr == DIF_SUBR_BASENAME); 4454 ASSERT(firstbase != -1 && lastbase != -1); 4455 start = firstbase; 4456 end = lastbase; 4457 } 4458 4459 for (i = start, j = 0; i <= end && j < size - 1; i++, j++) 4460 dest[j] = dtrace_load8(src + i); 4461 4462 dest[j] = '\0'; 4463 regs[rd] = (uintptr_t)dest; 4464 mstate->dtms_scratch_ptr += size; 4465 break; 4466 } 4467 4468 case DIF_SUBR_CLEANPATH: { 4469 char *dest = (char *)mstate->dtms_scratch_ptr, c; 4470 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4471 uintptr_t src = tupregs[0].dttk_value; 4472 int i = 0, j = 0; 4473 4474 if (!dtrace_strcanload(src, size, mstate, vstate)) { 4475 regs[rd] = NULL; 4476 break; 4477 } 4478 4479 if (!DTRACE_INSCRATCH(mstate, size)) { 4480 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4481 regs[rd] = NULL; 4482 break; 4483 } 4484 4485 /* 4486 * Move forward, loading each character. 4487 */ 4488 do { 4489 c = dtrace_load8(src + i++); 4490 next: 4491 if (j + 5 >= size) /* 5 = strlen("/..c\0") */ 4492 break; 4493 4494 if (c != '/') { 4495 dest[j++] = c; 4496 continue; 4497 } 4498 4499 c = dtrace_load8(src + i++); 4500 4501 if (c == '/') { 4502 /* 4503 * We have two slashes -- we can just advance 4504 * to the next character. 4505 */ 4506 goto next; 4507 } 4508 4509 if (c != '.') { 4510 /* 4511 * This is not "." and it's not ".." -- we can 4512 * just store the "/" and this character and 4513 * drive on. 4514 */ 4515 dest[j++] = '/'; 4516 dest[j++] = c; 4517 continue; 4518 } 4519 4520 c = dtrace_load8(src + i++); 4521 4522 if (c == '/') { 4523 /* 4524 * This is a "/./" component. We're not going 4525 * to store anything in the destination buffer; 4526 * we're just going to go to the next component. 4527 */ 4528 goto next; 4529 } 4530 4531 if (c != '.') { 4532 /* 4533 * This is not ".." -- we can just store the 4534 * "/." and this character and continue 4535 * processing. 4536 */ 4537 dest[j++] = '/'; 4538 dest[j++] = '.'; 4539 dest[j++] = c; 4540 continue; 4541 } 4542 4543 c = dtrace_load8(src + i++); 4544 4545 if (c != '/' && c != '\0') { 4546 /* 4547 * This is not ".." -- it's "..[mumble]". 4548 * We'll store the "/.." and this character 4549 * and continue processing. 4550 */ 4551 dest[j++] = '/'; 4552 dest[j++] = '.'; 4553 dest[j++] = '.'; 4554 dest[j++] = c; 4555 continue; 4556 } 4557 4558 /* 4559 * This is "/../" or "/..\0". We need to back up 4560 * our destination pointer until we find a "/". 4561 */ 4562 i--; 4563 while (j != 0 && dest[--j] != '/') 4564 continue; 4565 4566 if (c == '\0') 4567 dest[++j] = '/'; 4568 } while (c != '\0'); 4569 4570 dest[j] = '\0'; 4571 regs[rd] = (uintptr_t)dest; 4572 mstate->dtms_scratch_ptr += size; 4573 break; 4574 } 4575 4576 case DIF_SUBR_INET_NTOA: 4577 case DIF_SUBR_INET_NTOA6: 4578 case DIF_SUBR_INET_NTOP: { 4579 size_t size; 4580 int af, argi, i; 4581 char *base, *end; 4582 4583 if (subr == DIF_SUBR_INET_NTOP) { 4584 af = (int)tupregs[0].dttk_value; 4585 argi = 1; 4586 } else { 4587 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; 4588 argi = 0; 4589 } 4590 4591 if (af == AF_INET) { 4592 ipaddr_t ip4; 4593 uint8_t *ptr8, val; 4594 4595 /* 4596 * Safely load the IPv4 address. 4597 */ 4598 ip4 = dtrace_load32(tupregs[argi].dttk_value); 4599 4600 /* 4601 * Check an IPv4 string will fit in scratch. 4602 */ 4603 size = INET_ADDRSTRLEN; 4604 if (!DTRACE_INSCRATCH(mstate, size)) { 4605 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4606 regs[rd] = NULL; 4607 break; 4608 } 4609 base = (char *)mstate->dtms_scratch_ptr; 4610 end = (char *)mstate->dtms_scratch_ptr + size - 1; 4611 4612 /* 4613 * Stringify as a dotted decimal quad. 4614 */ 4615 *end-- = '\0'; 4616 ptr8 = (uint8_t *)&ip4; 4617 for (i = 3; i >= 0; i--) { 4618 val = ptr8[i]; 4619 4620 if (val == 0) { 4621 *end-- = '0'; 4622 } else { 4623 for (; val; val /= 10) { 4624 *end-- = '0' + (val % 10); 4625 } 4626 } 4627 4628 if (i > 0) 4629 *end-- = '.'; 4630 } 4631 ASSERT(end + 1 >= base); 4632 4633 } else if (af == AF_INET6) { 4634 struct in6_addr ip6; 4635 int firstzero, tryzero, numzero, v6end; 4636 uint16_t val; 4637 const char digits[] = "0123456789abcdef"; 4638 4639 /* 4640 * Stringify using RFC 1884 convention 2 - 16 bit 4641 * hexadecimal values with a zero-run compression. 4642 * Lower case hexadecimal digits are used. 4643 * eg, fe80::214:4fff:fe0b:76c8. 4644 * The IPv4 embedded form is returned for inet_ntop, 4645 * just the IPv4 string is returned for inet_ntoa6. 4646 */ 4647 4648 /* 4649 * Safely load the IPv6 address. 4650 */ 4651 dtrace_bcopy( 4652 (void *)(uintptr_t)tupregs[argi].dttk_value, 4653 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); 4654 4655 /* 4656 * Check an IPv6 string will fit in scratch. 4657 */ 4658 size = INET6_ADDRSTRLEN; 4659 if (!DTRACE_INSCRATCH(mstate, size)) { 4660 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4661 regs[rd] = NULL; 4662 break; 4663 } 4664 base = (char *)mstate->dtms_scratch_ptr; 4665 end = (char *)mstate->dtms_scratch_ptr + size - 1; 4666 *end-- = '\0'; 4667 4668 /* 4669 * Find the longest run of 16 bit zero values 4670 * for the single allowed zero compression - "::". 4671 */ 4672 firstzero = -1; 4673 tryzero = -1; 4674 numzero = 1; 4675 for (i = 0; i < sizeof (struct in6_addr); i++) { 4676 if (ip6._S6_un._S6_u8[i] == 0 && 4677 tryzero == -1 && i % 2 == 0) { 4678 tryzero = i; 4679 continue; 4680 } 4681 4682 if (tryzero != -1 && 4683 (ip6._S6_un._S6_u8[i] != 0 || 4684 i == sizeof (struct in6_addr) - 1)) { 4685 4686 if (i - tryzero <= numzero) { 4687 tryzero = -1; 4688 continue; 4689 } 4690 4691 firstzero = tryzero; 4692 numzero = i - i % 2 - tryzero; 4693 tryzero = -1; 4694 4695 if (ip6._S6_un._S6_u8[i] == 0 && 4696 i == sizeof (struct in6_addr) - 1) 4697 numzero += 2; 4698 } 4699 } 4700 ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); 4701 4702 /* 4703 * Check for an IPv4 embedded address. 4704 */ 4705 v6end = sizeof (struct in6_addr) - 2; 4706 if (IN6_IS_ADDR_V4MAPPED(&ip6) || 4707 IN6_IS_ADDR_V4COMPAT(&ip6)) { 4708 for (i = sizeof (struct in6_addr) - 1; 4709 i >= DTRACE_V4MAPPED_OFFSET; i--) { 4710 ASSERT(end >= base); 4711 4712 val = ip6._S6_un._S6_u8[i]; 4713 4714 if (val == 0) { 4715 *end-- = '0'; 4716 } else { 4717 for (; val; val /= 10) { 4718 *end-- = '0' + val % 10; 4719 } 4720 } 4721 4722 if (i > DTRACE_V4MAPPED_OFFSET) 4723 *end-- = '.'; 4724 } 4725 4726 if (subr == DIF_SUBR_INET_NTOA6) 4727 goto inetout; 4728 4729 /* 4730 * Set v6end to skip the IPv4 address that 4731 * we have already stringified. 4732 */ 4733 v6end = 10; 4734 } 4735 4736 /* 4737 * Build the IPv6 string by working through the 4738 * address in reverse. 4739 */ 4740 for (i = v6end; i >= 0; i -= 2) { 4741 ASSERT(end >= base); 4742 4743 if (i == firstzero + numzero - 2) { 4744 *end-- = ':'; 4745 *end-- = ':'; 4746 i -= numzero - 2; 4747 continue; 4748 } 4749 4750 if (i < 14 && i != firstzero - 2) 4751 *end-- = ':'; 4752 4753 val = (ip6._S6_un._S6_u8[i] << 8) + 4754 ip6._S6_un._S6_u8[i + 1]; 4755 4756 if (val == 0) { 4757 *end-- = '0'; 4758 } else { 4759 for (; val; val /= 16) { 4760 *end-- = digits[val % 16]; 4761 } 4762 } 4763 } 4764 ASSERT(end + 1 >= base); 4765 4766 } else { 4767 /* 4768 * The user didn't use AH_INET or AH_INET6. 4769 */ 4770 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 4771 regs[rd] = NULL; 4772 break; 4773 } 4774 4775 inetout: regs[rd] = (uintptr_t)end + 1; 4776 mstate->dtms_scratch_ptr += size; 4777 break; 4778 } 4779 4780 } 4781 } 4782 4783 /* 4784 * Emulate the execution of DTrace IR instructions specified by the given 4785 * DIF object. This function is deliberately void of assertions as all of 4786 * the necessary checks are handled by a call to dtrace_difo_validate(). 4787 */ 4788 static uint64_t 4789 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, 4790 dtrace_vstate_t *vstate, dtrace_state_t *state) 4791 { 4792 const dif_instr_t *text = difo->dtdo_buf; 4793 const uint_t textlen = difo->dtdo_len; 4794 const char *strtab = difo->dtdo_strtab; 4795 const uint64_t *inttab = difo->dtdo_inttab; 4796 4797 uint64_t rval = 0; 4798 dtrace_statvar_t *svar; 4799 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 4800 dtrace_difv_t *v; 4801 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 4802 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 4803 4804 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 4805 uint64_t regs[DIF_DIR_NREGS]; 4806 uint64_t *tmp; 4807 4808 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; 4809 int64_t cc_r; 4810 uint_t pc = 0, id, opc; 4811 uint8_t ttop = 0; 4812 dif_instr_t instr; 4813 uint_t r1, r2, rd; 4814 4815 /* 4816 * We stash the current DIF object into the machine state: we need it 4817 * for subsequent access checking. 4818 */ 4819 mstate->dtms_difo = difo; 4820 4821 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ 4822 4823 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { 4824 opc = pc; 4825 4826 instr = text[pc++]; 4827 r1 = DIF_INSTR_R1(instr); 4828 r2 = DIF_INSTR_R2(instr); 4829 rd = DIF_INSTR_RD(instr); 4830 4831 switch (DIF_INSTR_OP(instr)) { 4832 case DIF_OP_OR: 4833 regs[rd] = regs[r1] | regs[r2]; 4834 break; 4835 case DIF_OP_XOR: 4836 regs[rd] = regs[r1] ^ regs[r2]; 4837 break; 4838 case DIF_OP_AND: 4839 regs[rd] = regs[r1] & regs[r2]; 4840 break; 4841 case DIF_OP_SLL: 4842 regs[rd] = regs[r1] << regs[r2]; 4843 break; 4844 case DIF_OP_SRL: 4845 regs[rd] = regs[r1] >> regs[r2]; 4846 break; 4847 case DIF_OP_SUB: 4848 regs[rd] = regs[r1] - regs[r2]; 4849 break; 4850 case DIF_OP_ADD: 4851 regs[rd] = regs[r1] + regs[r2]; 4852 break; 4853 case DIF_OP_MUL: 4854 regs[rd] = regs[r1] * regs[r2]; 4855 break; 4856 case DIF_OP_SDIV: 4857 if (regs[r2] == 0) { 4858 regs[rd] = 0; 4859 *flags |= CPU_DTRACE_DIVZERO; 4860 } else { 4861 regs[rd] = (int64_t)regs[r1] / 4862 (int64_t)regs[r2]; 4863 } 4864 break; 4865 4866 case DIF_OP_UDIV: 4867 if (regs[r2] == 0) { 4868 regs[rd] = 0; 4869 *flags |= CPU_DTRACE_DIVZERO; 4870 } else { 4871 regs[rd] = regs[r1] / regs[r2]; 4872 } 4873 break; 4874 4875 case DIF_OP_SREM: 4876 if (regs[r2] == 0) { 4877 regs[rd] = 0; 4878 *flags |= CPU_DTRACE_DIVZERO; 4879 } else { 4880 regs[rd] = (int64_t)regs[r1] % 4881 (int64_t)regs[r2]; 4882 } 4883 break; 4884 4885 case DIF_OP_UREM: 4886 if (regs[r2] == 0) { 4887 regs[rd] = 0; 4888 *flags |= CPU_DTRACE_DIVZERO; 4889 } else { 4890 regs[rd] = regs[r1] % regs[r2]; 4891 } 4892 break; 4893 4894 case DIF_OP_NOT: 4895 regs[rd] = ~regs[r1]; 4896 break; 4897 case DIF_OP_MOV: 4898 regs[rd] = regs[r1]; 4899 break; 4900 case DIF_OP_CMP: 4901 cc_r = regs[r1] - regs[r2]; 4902 cc_n = cc_r < 0; 4903 cc_z = cc_r == 0; 4904 cc_v = 0; 4905 cc_c = regs[r1] < regs[r2]; 4906 break; 4907 case DIF_OP_TST: 4908 cc_n = cc_v = cc_c = 0; 4909 cc_z = regs[r1] == 0; 4910 break; 4911 case DIF_OP_BA: 4912 pc = DIF_INSTR_LABEL(instr); 4913 break; 4914 case DIF_OP_BE: 4915 if (cc_z) 4916 pc = DIF_INSTR_LABEL(instr); 4917 break; 4918 case DIF_OP_BNE: 4919 if (cc_z == 0) 4920 pc = DIF_INSTR_LABEL(instr); 4921 break; 4922 case DIF_OP_BG: 4923 if ((cc_z | (cc_n ^ cc_v)) == 0) 4924 pc = DIF_INSTR_LABEL(instr); 4925 break; 4926 case DIF_OP_BGU: 4927 if ((cc_c | cc_z) == 0) 4928 pc = DIF_INSTR_LABEL(instr); 4929 break; 4930 case DIF_OP_BGE: 4931 if ((cc_n ^ cc_v) == 0) 4932 pc = DIF_INSTR_LABEL(instr); 4933 break; 4934 case DIF_OP_BGEU: 4935 if (cc_c == 0) 4936 pc = DIF_INSTR_LABEL(instr); 4937 break; 4938 case DIF_OP_BL: 4939 if (cc_n ^ cc_v) 4940 pc = DIF_INSTR_LABEL(instr); 4941 break; 4942 case DIF_OP_BLU: 4943 if (cc_c) 4944 pc = DIF_INSTR_LABEL(instr); 4945 break; 4946 case DIF_OP_BLE: 4947 if (cc_z | (cc_n ^ cc_v)) 4948 pc = DIF_INSTR_LABEL(instr); 4949 break; 4950 case DIF_OP_BLEU: 4951 if (cc_c | cc_z) 4952 pc = DIF_INSTR_LABEL(instr); 4953 break; 4954 case DIF_OP_RLDSB: 4955 if (!dtrace_canload(regs[r1], 1, mstate, vstate)) 4956 break; 4957 /*FALLTHROUGH*/ 4958 case DIF_OP_LDSB: 4959 regs[rd] = (int8_t)dtrace_load8(regs[r1]); 4960 break; 4961 case DIF_OP_RLDSH: 4962 if (!dtrace_canload(regs[r1], 2, mstate, vstate)) 4963 break; 4964 /*FALLTHROUGH*/ 4965 case DIF_OP_LDSH: 4966 regs[rd] = (int16_t)dtrace_load16(regs[r1]); 4967 break; 4968 case DIF_OP_RLDSW: 4969 if (!dtrace_canload(regs[r1], 4, mstate, vstate)) 4970 break; 4971 /*FALLTHROUGH*/ 4972 case DIF_OP_LDSW: 4973 regs[rd] = (int32_t)dtrace_load32(regs[r1]); 4974 break; 4975 case DIF_OP_RLDUB: 4976 if (!dtrace_canload(regs[r1], 1, mstate, vstate)) 4977 break; 4978 /*FALLTHROUGH*/ 4979 case DIF_OP_LDUB: 4980 regs[rd] = dtrace_load8(regs[r1]); 4981 break; 4982 case DIF_OP_RLDUH: 4983 if (!dtrace_canload(regs[r1], 2, mstate, vstate)) 4984 break; 4985 /*FALLTHROUGH*/ 4986 case DIF_OP_LDUH: 4987 regs[rd] = dtrace_load16(regs[r1]); 4988 break; 4989 case DIF_OP_RLDUW: 4990 if (!dtrace_canload(regs[r1], 4, mstate, vstate)) 4991 break; 4992 /*FALLTHROUGH*/ 4993 case DIF_OP_LDUW: 4994 regs[rd] = dtrace_load32(regs[r1]); 4995 break; 4996 case DIF_OP_RLDX: 4997 if (!dtrace_canload(regs[r1], 8, mstate, vstate)) 4998 break; 4999 /*FALLTHROUGH*/ 5000 case DIF_OP_LDX: 5001 regs[rd] = dtrace_load64(regs[r1]); 5002 break; 5003 case DIF_OP_ULDSB: 5004 regs[rd] = (int8_t) 5005 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 5006 break; 5007 case DIF_OP_ULDSH: 5008 regs[rd] = (int16_t) 5009 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 5010 break; 5011 case DIF_OP_ULDSW: 5012 regs[rd] = (int32_t) 5013 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 5014 break; 5015 case DIF_OP_ULDUB: 5016 regs[rd] = 5017 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 5018 break; 5019 case DIF_OP_ULDUH: 5020 regs[rd] = 5021 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 5022 break; 5023 case DIF_OP_ULDUW: 5024 regs[rd] = 5025 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 5026 break; 5027 case DIF_OP_ULDX: 5028 regs[rd] = 5029 dtrace_fuword64((void *)(uintptr_t)regs[r1]); 5030 break; 5031 case DIF_OP_RET: 5032 rval = regs[rd]; 5033 pc = textlen; 5034 break; 5035 case DIF_OP_NOP: 5036 break; 5037 case DIF_OP_SETX: 5038 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; 5039 break; 5040 case DIF_OP_SETS: 5041 regs[rd] = (uint64_t)(uintptr_t) 5042 (strtab + DIF_INSTR_STRING(instr)); 5043 break; 5044 case DIF_OP_SCMP: { 5045 size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; 5046 uintptr_t s1 = regs[r1]; 5047 uintptr_t s2 = regs[r2]; 5048 5049 if (s1 != NULL && 5050 !dtrace_strcanload(s1, sz, mstate, vstate)) 5051 break; 5052 if (s2 != NULL && 5053 !dtrace_strcanload(s2, sz, mstate, vstate)) 5054 break; 5055 5056 cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); 5057 5058 cc_n = cc_r < 0; 5059 cc_z = cc_r == 0; 5060 cc_v = cc_c = 0; 5061 break; 5062 } 5063 case DIF_OP_LDGA: 5064 regs[rd] = dtrace_dif_variable(mstate, state, 5065 r1, regs[r2]); 5066 break; 5067 case DIF_OP_LDGS: 5068 id = DIF_INSTR_VAR(instr); 5069 5070 if (id >= DIF_VAR_OTHER_UBASE) { 5071 uintptr_t a; 5072 5073 id -= DIF_VAR_OTHER_UBASE; 5074 svar = vstate->dtvs_globals[id]; 5075 ASSERT(svar != NULL); 5076 v = &svar->dtsv_var; 5077 5078 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { 5079 regs[rd] = svar->dtsv_data; 5080 break; 5081 } 5082 5083 a = (uintptr_t)svar->dtsv_data; 5084 5085 if (*(uint8_t *)a == UINT8_MAX) { 5086 /* 5087 * If the 0th byte is set to UINT8_MAX 5088 * then this is to be treated as a 5089 * reference to a NULL variable. 5090 */ 5091 regs[rd] = NULL; 5092 } else { 5093 regs[rd] = a + sizeof (uint64_t); 5094 } 5095 5096 break; 5097 } 5098 5099 regs[rd] = dtrace_dif_variable(mstate, state, id, 0); 5100 break; 5101 5102 case DIF_OP_STGS: 5103 id = DIF_INSTR_VAR(instr); 5104 5105 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5106 id -= DIF_VAR_OTHER_UBASE; 5107 5108 svar = vstate->dtvs_globals[id]; 5109 ASSERT(svar != NULL); 5110 v = &svar->dtsv_var; 5111 5112 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5113 uintptr_t a = (uintptr_t)svar->dtsv_data; 5114 5115 ASSERT(a != NULL); 5116 ASSERT(svar->dtsv_size != 0); 5117 5118 if (regs[rd] == NULL) { 5119 *(uint8_t *)a = UINT8_MAX; 5120 break; 5121 } else { 5122 *(uint8_t *)a = 0; 5123 a += sizeof (uint64_t); 5124 } 5125 if (!dtrace_vcanload( 5126 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 5127 mstate, vstate)) 5128 break; 5129 5130 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5131 (void *)a, &v->dtdv_type); 5132 break; 5133 } 5134 5135 svar->dtsv_data = regs[rd]; 5136 break; 5137 5138 case DIF_OP_LDTA: 5139 /* 5140 * There are no DTrace built-in thread-local arrays at 5141 * present. This opcode is saved for future work. 5142 */ 5143 *flags |= CPU_DTRACE_ILLOP; 5144 regs[rd] = 0; 5145 break; 5146 5147 case DIF_OP_LDLS: 5148 id = DIF_INSTR_VAR(instr); 5149 5150 if (id < DIF_VAR_OTHER_UBASE) { 5151 /* 5152 * For now, this has no meaning. 5153 */ 5154 regs[rd] = 0; 5155 break; 5156 } 5157 5158 id -= DIF_VAR_OTHER_UBASE; 5159 5160 ASSERT(id < vstate->dtvs_nlocals); 5161 ASSERT(vstate->dtvs_locals != NULL); 5162 5163 svar = vstate->dtvs_locals[id]; 5164 ASSERT(svar != NULL); 5165 v = &svar->dtsv_var; 5166 5167 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5168 uintptr_t a = (uintptr_t)svar->dtsv_data; 5169 size_t sz = v->dtdv_type.dtdt_size; 5170 5171 sz += sizeof (uint64_t); 5172 ASSERT(svar->dtsv_size == NCPU * sz); 5173 a += CPU->cpu_id * sz; 5174 5175 if (*(uint8_t *)a == UINT8_MAX) { 5176 /* 5177 * If the 0th byte is set to UINT8_MAX 5178 * then this is to be treated as a 5179 * reference to a NULL variable. 5180 */ 5181 regs[rd] = NULL; 5182 } else { 5183 regs[rd] = a + sizeof (uint64_t); 5184 } 5185 5186 break; 5187 } 5188 5189 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 5190 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 5191 regs[rd] = tmp[CPU->cpu_id]; 5192 break; 5193 5194 case DIF_OP_STLS: 5195 id = DIF_INSTR_VAR(instr); 5196 5197 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5198 id -= DIF_VAR_OTHER_UBASE; 5199 ASSERT(id < vstate->dtvs_nlocals); 5200 5201 ASSERT(vstate->dtvs_locals != NULL); 5202 svar = vstate->dtvs_locals[id]; 5203 ASSERT(svar != NULL); 5204 v = &svar->dtsv_var; 5205 5206 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5207 uintptr_t a = (uintptr_t)svar->dtsv_data; 5208 size_t sz = v->dtdv_type.dtdt_size; 5209 5210 sz += sizeof (uint64_t); 5211 ASSERT(svar->dtsv_size == NCPU * sz); 5212 a += CPU->cpu_id * sz; 5213 5214 if (regs[rd] == NULL) { 5215 *(uint8_t *)a = UINT8_MAX; 5216 break; 5217 } else { 5218 *(uint8_t *)a = 0; 5219 a += sizeof (uint64_t); 5220 } 5221 5222 if (!dtrace_vcanload( 5223 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 5224 mstate, vstate)) 5225 break; 5226 5227 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5228 (void *)a, &v->dtdv_type); 5229 break; 5230 } 5231 5232 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 5233 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 5234 tmp[CPU->cpu_id] = regs[rd]; 5235 break; 5236 5237 case DIF_OP_LDTS: { 5238 dtrace_dynvar_t *dvar; 5239 dtrace_key_t *key; 5240 5241 id = DIF_INSTR_VAR(instr); 5242 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5243 id -= DIF_VAR_OTHER_UBASE; 5244 v = &vstate->dtvs_tlocals[id]; 5245 5246 key = &tupregs[DIF_DTR_NREGS]; 5247 key[0].dttk_value = (uint64_t)id; 5248 key[0].dttk_size = 0; 5249 DTRACE_TLS_THRKEY(key[1].dttk_value); 5250 key[1].dttk_size = 0; 5251 5252 dvar = dtrace_dynvar(dstate, 2, key, 5253 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, 5254 mstate, vstate); 5255 5256 if (dvar == NULL) { 5257 regs[rd] = 0; 5258 break; 5259 } 5260 5261 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5262 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 5263 } else { 5264 regs[rd] = *((uint64_t *)dvar->dtdv_data); 5265 } 5266 5267 break; 5268 } 5269 5270 case DIF_OP_STTS: { 5271 dtrace_dynvar_t *dvar; 5272 dtrace_key_t *key; 5273 5274 id = DIF_INSTR_VAR(instr); 5275 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5276 id -= DIF_VAR_OTHER_UBASE; 5277 5278 key = &tupregs[DIF_DTR_NREGS]; 5279 key[0].dttk_value = (uint64_t)id; 5280 key[0].dttk_size = 0; 5281 DTRACE_TLS_THRKEY(key[1].dttk_value); 5282 key[1].dttk_size = 0; 5283 v = &vstate->dtvs_tlocals[id]; 5284 5285 dvar = dtrace_dynvar(dstate, 2, key, 5286 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 5287 v->dtdv_type.dtdt_size : sizeof (uint64_t), 5288 regs[rd] ? DTRACE_DYNVAR_ALLOC : 5289 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 5290 5291 /* 5292 * Given that we're storing to thread-local data, 5293 * we need to flush our predicate cache. 5294 */ 5295 curthread->t_predcache = NULL; 5296 5297 if (dvar == NULL) 5298 break; 5299 5300 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5301 if (!dtrace_vcanload( 5302 (void *)(uintptr_t)regs[rd], 5303 &v->dtdv_type, mstate, vstate)) 5304 break; 5305 5306 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5307 dvar->dtdv_data, &v->dtdv_type); 5308 } else { 5309 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 5310 } 5311 5312 break; 5313 } 5314 5315 case DIF_OP_SRA: 5316 regs[rd] = (int64_t)regs[r1] >> regs[r2]; 5317 break; 5318 5319 case DIF_OP_CALL: 5320 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, 5321 regs, tupregs, ttop, mstate, state); 5322 break; 5323 5324 case DIF_OP_PUSHTR: 5325 if (ttop == DIF_DTR_NREGS) { 5326 *flags |= CPU_DTRACE_TUPOFLOW; 5327 break; 5328 } 5329 5330 if (r1 == DIF_TYPE_STRING) { 5331 /* 5332 * If this is a string type and the size is 0, 5333 * we'll use the system-wide default string 5334 * size. Note that we are _not_ looking at 5335 * the value of the DTRACEOPT_STRSIZE option; 5336 * had this been set, we would expect to have 5337 * a non-zero size value in the "pushtr". 5338 */ 5339 tupregs[ttop].dttk_size = 5340 dtrace_strlen((char *)(uintptr_t)regs[rd], 5341 regs[r2] ? regs[r2] : 5342 dtrace_strsize_default) + 1; 5343 } else { 5344 tupregs[ttop].dttk_size = regs[r2]; 5345 } 5346 5347 tupregs[ttop++].dttk_value = regs[rd]; 5348 break; 5349 5350 case DIF_OP_PUSHTV: 5351 if (ttop == DIF_DTR_NREGS) { 5352 *flags |= CPU_DTRACE_TUPOFLOW; 5353 break; 5354 } 5355 5356 tupregs[ttop].dttk_value = regs[rd]; 5357 tupregs[ttop++].dttk_size = 0; 5358 break; 5359 5360 case DIF_OP_POPTS: 5361 if (ttop != 0) 5362 ttop--; 5363 break; 5364 5365 case DIF_OP_FLUSHTS: 5366 ttop = 0; 5367 break; 5368 5369 case DIF_OP_LDGAA: 5370 case DIF_OP_LDTAA: { 5371 dtrace_dynvar_t *dvar; 5372 dtrace_key_t *key = tupregs; 5373 uint_t nkeys = ttop; 5374 5375 id = DIF_INSTR_VAR(instr); 5376 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5377 id -= DIF_VAR_OTHER_UBASE; 5378 5379 key[nkeys].dttk_value = (uint64_t)id; 5380 key[nkeys++].dttk_size = 0; 5381 5382 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { 5383 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 5384 key[nkeys++].dttk_size = 0; 5385 v = &vstate->dtvs_tlocals[id]; 5386 } else { 5387 v = &vstate->dtvs_globals[id]->dtsv_var; 5388 } 5389 5390 dvar = dtrace_dynvar(dstate, nkeys, key, 5391 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 5392 v->dtdv_type.dtdt_size : sizeof (uint64_t), 5393 DTRACE_DYNVAR_NOALLOC, mstate, vstate); 5394 5395 if (dvar == NULL) { 5396 regs[rd] = 0; 5397 break; 5398 } 5399 5400 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5401 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 5402 } else { 5403 regs[rd] = *((uint64_t *)dvar->dtdv_data); 5404 } 5405 5406 break; 5407 } 5408 5409 case DIF_OP_STGAA: 5410 case DIF_OP_STTAA: { 5411 dtrace_dynvar_t *dvar; 5412 dtrace_key_t *key = tupregs; 5413 uint_t nkeys = ttop; 5414 5415 id = DIF_INSTR_VAR(instr); 5416 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5417 id -= DIF_VAR_OTHER_UBASE; 5418 5419 key[nkeys].dttk_value = (uint64_t)id; 5420 key[nkeys++].dttk_size = 0; 5421 5422 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { 5423 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 5424 key[nkeys++].dttk_size = 0; 5425 v = &vstate->dtvs_tlocals[id]; 5426 } else { 5427 v = &vstate->dtvs_globals[id]->dtsv_var; 5428 } 5429 5430 dvar = dtrace_dynvar(dstate, nkeys, key, 5431 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 5432 v->dtdv_type.dtdt_size : sizeof (uint64_t), 5433 regs[rd] ? DTRACE_DYNVAR_ALLOC : 5434 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 5435 5436 if (dvar == NULL) 5437 break; 5438 5439 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5440 if (!dtrace_vcanload( 5441 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 5442 mstate, vstate)) 5443 break; 5444 5445 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5446 dvar->dtdv_data, &v->dtdv_type); 5447 } else { 5448 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 5449 } 5450 5451 break; 5452 } 5453 5454 case DIF_OP_ALLOCS: { 5455 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 5456 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; 5457 5458 /* 5459 * Rounding up the user allocation size could have 5460 * overflowed large, bogus allocations (like -1ULL) to 5461 * 0. 5462 */ 5463 if (size < regs[r1] || 5464 !DTRACE_INSCRATCH(mstate, size)) { 5465 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5466 regs[rd] = NULL; 5467 break; 5468 } 5469 5470 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); 5471 mstate->dtms_scratch_ptr += size; 5472 regs[rd] = ptr; 5473 break; 5474 } 5475 5476 case DIF_OP_COPYS: 5477 if (!dtrace_canstore(regs[rd], regs[r2], 5478 mstate, vstate)) { 5479 *flags |= CPU_DTRACE_BADADDR; 5480 *illval = regs[rd]; 5481 break; 5482 } 5483 5484 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) 5485 break; 5486 5487 dtrace_bcopy((void *)(uintptr_t)regs[r1], 5488 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); 5489 break; 5490 5491 case DIF_OP_STB: 5492 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { 5493 *flags |= CPU_DTRACE_BADADDR; 5494 *illval = regs[rd]; 5495 break; 5496 } 5497 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; 5498 break; 5499 5500 case DIF_OP_STH: 5501 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { 5502 *flags |= CPU_DTRACE_BADADDR; 5503 *illval = regs[rd]; 5504 break; 5505 } 5506 if (regs[rd] & 1) { 5507 *flags |= CPU_DTRACE_BADALIGN; 5508 *illval = regs[rd]; 5509 break; 5510 } 5511 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; 5512 break; 5513 5514 case DIF_OP_STW: 5515 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { 5516 *flags |= CPU_DTRACE_BADADDR; 5517 *illval = regs[rd]; 5518 break; 5519 } 5520 if (regs[rd] & 3) { 5521 *flags |= CPU_DTRACE_BADALIGN; 5522 *illval = regs[rd]; 5523 break; 5524 } 5525 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; 5526 break; 5527 5528 case DIF_OP_STX: 5529 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { 5530 *flags |= CPU_DTRACE_BADADDR; 5531 *illval = regs[rd]; 5532 break; 5533 } 5534 if (regs[rd] & 7) { 5535 *flags |= CPU_DTRACE_BADALIGN; 5536 *illval = regs[rd]; 5537 break; 5538 } 5539 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; 5540 break; 5541 } 5542 } 5543 5544 if (!(*flags & CPU_DTRACE_FAULT)) 5545 return (rval); 5546 5547 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t); 5548 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; 5549 5550 return (0); 5551 } 5552 5553 static void 5554 dtrace_action_breakpoint(dtrace_ecb_t *ecb) 5555 { 5556 dtrace_probe_t *probe = ecb->dte_probe; 5557 dtrace_provider_t *prov = probe->dtpr_provider; 5558 char c[DTRACE_FULLNAMELEN + 80], *str; 5559 char *msg = "dtrace: breakpoint action at probe "; 5560 char *ecbmsg = " (ecb "; 5561 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); 5562 uintptr_t val = (uintptr_t)ecb; 5563 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; 5564 5565 if (dtrace_destructive_disallow) 5566 return; 5567 5568 /* 5569 * It's impossible to be taking action on the NULL probe. 5570 */ 5571 ASSERT(probe != NULL); 5572 5573 /* 5574 * This is a poor man's (destitute man's?) sprintf(): we want to 5575 * print the provider name, module name, function name and name of 5576 * the probe, along with the hex address of the ECB with the breakpoint 5577 * action -- all of which we must place in the character buffer by 5578 * hand. 5579 */ 5580 while (*msg != '\0') 5581 c[i++] = *msg++; 5582 5583 for (str = prov->dtpv_name; *str != '\0'; str++) 5584 c[i++] = *str; 5585 c[i++] = ':'; 5586 5587 for (str = probe->dtpr_mod; *str != '\0'; str++) 5588 c[i++] = *str; 5589 c[i++] = ':'; 5590 5591 for (str = probe->dtpr_func; *str != '\0'; str++) 5592 c[i++] = *str; 5593 c[i++] = ':'; 5594 5595 for (str = probe->dtpr_name; *str != '\0'; str++) 5596 c[i++] = *str; 5597 5598 while (*ecbmsg != '\0') 5599 c[i++] = *ecbmsg++; 5600 5601 while (shift >= 0) { 5602 mask = (uintptr_t)0xf << shift; 5603 5604 if (val >= ((uintptr_t)1 << shift)) 5605 c[i++] = "0123456789abcdef"[(val & mask) >> shift]; 5606 shift -= 4; 5607 } 5608 5609 c[i++] = ')'; 5610 c[i] = '\0'; 5611 5612 debug_enter(c); 5613 } 5614 5615 static void 5616 dtrace_action_panic(dtrace_ecb_t *ecb) 5617 { 5618 dtrace_probe_t *probe = ecb->dte_probe; 5619 5620 /* 5621 * It's impossible to be taking action on the NULL probe. 5622 */ 5623 ASSERT(probe != NULL); 5624 5625 if (dtrace_destructive_disallow) 5626 return; 5627 5628 if (dtrace_panicked != NULL) 5629 return; 5630 5631 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) 5632 return; 5633 5634 /* 5635 * We won the right to panic. (We want to be sure that only one 5636 * thread calls panic() from dtrace_probe(), and that panic() is 5637 * called exactly once.) 5638 */ 5639 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 5640 probe->dtpr_provider->dtpv_name, probe->dtpr_mod, 5641 probe->dtpr_func, probe->dtpr_name, (void *)ecb); 5642 } 5643 5644 static void 5645 dtrace_action_raise(uint64_t sig) 5646 { 5647 if (dtrace_destructive_disallow) 5648 return; 5649 5650 if (sig >= NSIG) { 5651 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 5652 return; 5653 } 5654 5655 /* 5656 * raise() has a queue depth of 1 -- we ignore all subsequent 5657 * invocations of the raise() action. 5658 */ 5659 if (curthread->t_dtrace_sig == 0) 5660 curthread->t_dtrace_sig = (uint8_t)sig; 5661 5662 curthread->t_sig_check = 1; 5663 aston(curthread); 5664 } 5665 5666 static void 5667 dtrace_action_stop(void) 5668 { 5669 if (dtrace_destructive_disallow) 5670 return; 5671 5672 if (!curthread->t_dtrace_stop) { 5673 curthread->t_dtrace_stop = 1; 5674 curthread->t_sig_check = 1; 5675 aston(curthread); 5676 } 5677 } 5678 5679 static void 5680 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) 5681 { 5682 hrtime_t now; 5683 volatile uint16_t *flags; 5684 cpu_t *cpu = CPU; 5685 5686 if (dtrace_destructive_disallow) 5687 return; 5688 5689 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags; 5690 5691 now = dtrace_gethrtime(); 5692 5693 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) { 5694 /* 5695 * We need to advance the mark to the current time. 5696 */ 5697 cpu->cpu_dtrace_chillmark = now; 5698 cpu->cpu_dtrace_chilled = 0; 5699 } 5700 5701 /* 5702 * Now check to see if the requested chill time would take us over 5703 * the maximum amount of time allowed in the chill interval. (Or 5704 * worse, if the calculation itself induces overflow.) 5705 */ 5706 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max || 5707 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) { 5708 *flags |= CPU_DTRACE_ILLOP; 5709 return; 5710 } 5711 5712 while (dtrace_gethrtime() - now < val) 5713 continue; 5714 5715 /* 5716 * Normally, we assure that the value of the variable "timestamp" does 5717 * not change within an ECB. The presence of chill() represents an 5718 * exception to this rule, however. 5719 */ 5720 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; 5721 cpu->cpu_dtrace_chilled += val; 5722 } 5723 5724 static void 5725 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, 5726 uint64_t *buf, uint64_t arg) 5727 { 5728 int nframes = DTRACE_USTACK_NFRAMES(arg); 5729 int strsize = DTRACE_USTACK_STRSIZE(arg); 5730 uint64_t *pcs = &buf[1], *fps; 5731 char *str = (char *)&pcs[nframes]; 5732 int size, offs = 0, i, j; 5733 uintptr_t old = mstate->dtms_scratch_ptr, saved; 5734 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 5735 char *sym; 5736 5737 /* 5738 * Should be taking a faster path if string space has not been 5739 * allocated. 5740 */ 5741 ASSERT(strsize != 0); 5742 5743 /* 5744 * We will first allocate some temporary space for the frame pointers. 5745 */ 5746 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 5747 size = (uintptr_t)fps - mstate->dtms_scratch_ptr + 5748 (nframes * sizeof (uint64_t)); 5749 5750 if (!DTRACE_INSCRATCH(mstate, size)) { 5751 /* 5752 * Not enough room for our frame pointers -- need to indicate 5753 * that we ran out of scratch space. 5754 */ 5755 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5756 return; 5757 } 5758 5759 mstate->dtms_scratch_ptr += size; 5760 saved = mstate->dtms_scratch_ptr; 5761 5762 /* 5763 * Now get a stack with both program counters and frame pointers. 5764 */ 5765 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5766 dtrace_getufpstack(buf, fps, nframes + 1); 5767 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5768 5769 /* 5770 * If that faulted, we're cooked. 5771 */ 5772 if (*flags & CPU_DTRACE_FAULT) 5773 goto out; 5774 5775 /* 5776 * Now we want to walk up the stack, calling the USTACK helper. For 5777 * each iteration, we restore the scratch pointer. 5778 */ 5779 for (i = 0; i < nframes; i++) { 5780 mstate->dtms_scratch_ptr = saved; 5781 5782 if (offs >= strsize) 5783 break; 5784 5785 sym = (char *)(uintptr_t)dtrace_helper( 5786 DTRACE_HELPER_ACTION_USTACK, 5787 mstate, state, pcs[i], fps[i]); 5788 5789 /* 5790 * If we faulted while running the helper, we're going to 5791 * clear the fault and null out the corresponding string. 5792 */ 5793 if (*flags & CPU_DTRACE_FAULT) { 5794 *flags &= ~CPU_DTRACE_FAULT; 5795 str[offs++] = '\0'; 5796 continue; 5797 } 5798 5799 if (sym == NULL) { 5800 str[offs++] = '\0'; 5801 continue; 5802 } 5803 5804 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5805 5806 /* 5807 * Now copy in the string that the helper returned to us. 5808 */ 5809 for (j = 0; offs + j < strsize; j++) { 5810 if ((str[offs + j] = sym[j]) == '\0') 5811 break; 5812 } 5813 5814 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5815 5816 offs += j + 1; 5817 } 5818 5819 if (offs >= strsize) { 5820 /* 5821 * If we didn't have room for all of the strings, we don't 5822 * abort processing -- this needn't be a fatal error -- but we 5823 * still want to increment a counter (dts_stkstroverflows) to 5824 * allow this condition to be warned about. (If this is from 5825 * a jstack() action, it is easily tuned via jstackstrsize.) 5826 */ 5827 dtrace_error(&state->dts_stkstroverflows); 5828 } 5829 5830 while (offs < strsize) 5831 str[offs++] = '\0'; 5832 5833 out: 5834 mstate->dtms_scratch_ptr = old; 5835 } 5836 5837 /* 5838 * If you're looking for the epicenter of DTrace, you just found it. This 5839 * is the function called by the provider to fire a probe -- from which all 5840 * subsequent probe-context DTrace activity emanates. 5841 */ 5842 void 5843 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, 5844 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) 5845 { 5846 processorid_t cpuid; 5847 dtrace_icookie_t cookie; 5848 dtrace_probe_t *probe; 5849 dtrace_mstate_t mstate; 5850 dtrace_ecb_t *ecb; 5851 dtrace_action_t *act; 5852 intptr_t offs; 5853 size_t size; 5854 int vtime, onintr; 5855 volatile uint16_t *flags; 5856 hrtime_t now; 5857 5858 /* 5859 * Kick out immediately if this CPU is still being born (in which case 5860 * curthread will be set to -1) or the current thread can't allow 5861 * probes in its current context. 5862 */ 5863 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) 5864 return; 5865 5866 cookie = dtrace_interrupt_disable(); 5867 probe = dtrace_probes[id - 1]; 5868 cpuid = CPU->cpu_id; 5869 onintr = CPU_ON_INTR(CPU); 5870 5871 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && 5872 probe->dtpr_predcache == curthread->t_predcache) { 5873 /* 5874 * We have hit in the predicate cache; we know that 5875 * this predicate would evaluate to be false. 5876 */ 5877 dtrace_interrupt_enable(cookie); 5878 return; 5879 } 5880 5881 if (panic_quiesce) { 5882 /* 5883 * We don't trace anything if we're panicking. 5884 */ 5885 dtrace_interrupt_enable(cookie); 5886 return; 5887 } 5888 5889 now = dtrace_gethrtime(); 5890 vtime = dtrace_vtime_references != 0; 5891 5892 if (vtime && curthread->t_dtrace_start) 5893 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; 5894 5895 mstate.dtms_difo = NULL; 5896 mstate.dtms_probe = probe; 5897 mstate.dtms_strtok = NULL; 5898 mstate.dtms_arg[0] = arg0; 5899 mstate.dtms_arg[1] = arg1; 5900 mstate.dtms_arg[2] = arg2; 5901 mstate.dtms_arg[3] = arg3; 5902 mstate.dtms_arg[4] = arg4; 5903 5904 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags; 5905 5906 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 5907 dtrace_predicate_t *pred = ecb->dte_predicate; 5908 dtrace_state_t *state = ecb->dte_state; 5909 dtrace_buffer_t *buf = &state->dts_buffer[cpuid]; 5910 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; 5911 dtrace_vstate_t *vstate = &state->dts_vstate; 5912 dtrace_provider_t *prov = probe->dtpr_provider; 5913 uint64_t tracememsize = 0; 5914 int committed = 0; 5915 caddr_t tomax; 5916 5917 /* 5918 * A little subtlety with the following (seemingly innocuous) 5919 * declaration of the automatic 'val': by looking at the 5920 * code, you might think that it could be declared in the 5921 * action processing loop, below. (That is, it's only used in 5922 * the action processing loop.) However, it must be declared 5923 * out of that scope because in the case of DIF expression 5924 * arguments to aggregating actions, one iteration of the 5925 * action loop will use the last iteration's value. 5926 */ 5927 #ifdef lint 5928 uint64_t val = 0; 5929 #else 5930 uint64_t val; 5931 #endif 5932 5933 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; 5934 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC; 5935 *flags &= ~CPU_DTRACE_ERROR; 5936 5937 if (prov == dtrace_provider) { 5938 /* 5939 * If dtrace itself is the provider of this probe, 5940 * we're only going to continue processing the ECB if 5941 * arg0 (the dtrace_state_t) is equal to the ECB's 5942 * creating state. (This prevents disjoint consumers 5943 * from seeing one another's metaprobes.) 5944 */ 5945 if (arg0 != (uint64_t)(uintptr_t)state) 5946 continue; 5947 } 5948 5949 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { 5950 /* 5951 * We're not currently active. If our provider isn't 5952 * the dtrace pseudo provider, we're not interested. 5953 */ 5954 if (prov != dtrace_provider) 5955 continue; 5956 5957 /* 5958 * Now we must further check if we are in the BEGIN 5959 * probe. If we are, we will only continue processing 5960 * if we're still in WARMUP -- if one BEGIN enabling 5961 * has invoked the exit() action, we don't want to 5962 * evaluate subsequent BEGIN enablings. 5963 */ 5964 if (probe->dtpr_id == dtrace_probeid_begin && 5965 state->dts_activity != DTRACE_ACTIVITY_WARMUP) { 5966 ASSERT(state->dts_activity == 5967 DTRACE_ACTIVITY_DRAINING); 5968 continue; 5969 } 5970 } 5971 5972 if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb)) 5973 continue; 5974 5975 if (now - state->dts_alive > dtrace_deadman_timeout) { 5976 /* 5977 * We seem to be dead. Unless we (a) have kernel 5978 * destructive permissions (b) have expicitly enabled 5979 * destructive actions and (c) destructive actions have 5980 * not been disabled, we're going to transition into 5981 * the KILLED state, from which no further processing 5982 * on this state will be performed. 5983 */ 5984 if (!dtrace_priv_kernel_destructive(state) || 5985 !state->dts_cred.dcr_destructive || 5986 dtrace_destructive_disallow) { 5987 void *activity = &state->dts_activity; 5988 dtrace_activity_t current; 5989 5990 do { 5991 current = state->dts_activity; 5992 } while (dtrace_cas32(activity, current, 5993 DTRACE_ACTIVITY_KILLED) != current); 5994 5995 continue; 5996 } 5997 } 5998 5999 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed, 6000 ecb->dte_alignment, state, &mstate)) < 0) 6001 continue; 6002 6003 tomax = buf->dtb_tomax; 6004 ASSERT(tomax != NULL); 6005 6006 if (ecb->dte_size != 0) 6007 DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid); 6008 6009 mstate.dtms_epid = ecb->dte_epid; 6010 mstate.dtms_present |= DTRACE_MSTATE_EPID; 6011 6012 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) 6013 mstate.dtms_access |= DTRACE_ACCESS_KERNEL; 6014 6015 if (pred != NULL) { 6016 dtrace_difo_t *dp = pred->dtp_difo; 6017 int rval; 6018 6019 rval = dtrace_dif_emulate(dp, &mstate, vstate, state); 6020 6021 if (!(*flags & CPU_DTRACE_ERROR) && !rval) { 6022 dtrace_cacheid_t cid = probe->dtpr_predcache; 6023 6024 if (cid != DTRACE_CACHEIDNONE && !onintr) { 6025 /* 6026 * Update the predicate cache... 6027 */ 6028 ASSERT(cid == pred->dtp_cacheid); 6029 curthread->t_predcache = cid; 6030 } 6031 6032 continue; 6033 } 6034 } 6035 6036 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) && 6037 act != NULL; act = act->dta_next) { 6038 size_t valoffs; 6039 dtrace_difo_t *dp; 6040 dtrace_recdesc_t *rec = &act->dta_rec; 6041 6042 size = rec->dtrd_size; 6043 valoffs = offs + rec->dtrd_offset; 6044 6045 if (DTRACEACT_ISAGG(act->dta_kind)) { 6046 uint64_t v = 0xbad; 6047 dtrace_aggregation_t *agg; 6048 6049 agg = (dtrace_aggregation_t *)act; 6050 6051 if ((dp = act->dta_difo) != NULL) 6052 v = dtrace_dif_emulate(dp, 6053 &mstate, vstate, state); 6054 6055 if (*flags & CPU_DTRACE_ERROR) 6056 continue; 6057 6058 /* 6059 * Note that we always pass the expression 6060 * value from the previous iteration of the 6061 * action loop. This value will only be used 6062 * if there is an expression argument to the 6063 * aggregating action, denoted by the 6064 * dtag_hasarg field. 6065 */ 6066 dtrace_aggregate(agg, buf, 6067 offs, aggbuf, v, val); 6068 continue; 6069 } 6070 6071 switch (act->dta_kind) { 6072 case DTRACEACT_STOP: 6073 if (dtrace_priv_proc_destructive(state, 6074 &mstate)) 6075 dtrace_action_stop(); 6076 continue; 6077 6078 case DTRACEACT_BREAKPOINT: 6079 if (dtrace_priv_kernel_destructive(state)) 6080 dtrace_action_breakpoint(ecb); 6081 continue; 6082 6083 case DTRACEACT_PANIC: 6084 if (dtrace_priv_kernel_destructive(state)) 6085 dtrace_action_panic(ecb); 6086 continue; 6087 6088 case DTRACEACT_STACK: 6089 if (!dtrace_priv_kernel(state)) 6090 continue; 6091 6092 dtrace_getpcstack((pc_t *)(tomax + valoffs), 6093 size / sizeof (pc_t), probe->dtpr_aframes, 6094 DTRACE_ANCHORED(probe) ? NULL : 6095 (uint32_t *)arg0); 6096 6097 continue; 6098 6099 case DTRACEACT_JSTACK: 6100 case DTRACEACT_USTACK: 6101 if (!dtrace_priv_proc(state, &mstate)) 6102 continue; 6103 6104 /* 6105 * See comment in DIF_VAR_PID. 6106 */ 6107 if (DTRACE_ANCHORED(mstate.dtms_probe) && 6108 CPU_ON_INTR(CPU)) { 6109 int depth = DTRACE_USTACK_NFRAMES( 6110 rec->dtrd_arg) + 1; 6111 6112 dtrace_bzero((void *)(tomax + valoffs), 6113 DTRACE_USTACK_STRSIZE(rec->dtrd_arg) 6114 + depth * sizeof (uint64_t)); 6115 6116 continue; 6117 } 6118 6119 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && 6120 curproc->p_dtrace_helpers != NULL) { 6121 /* 6122 * This is the slow path -- we have 6123 * allocated string space, and we're 6124 * getting the stack of a process that 6125 * has helpers. Call into a separate 6126 * routine to perform this processing. 6127 */ 6128 dtrace_action_ustack(&mstate, state, 6129 (uint64_t *)(tomax + valoffs), 6130 rec->dtrd_arg); 6131 continue; 6132 } 6133 6134 /* 6135 * Clear the string space, since there's no 6136 * helper to do it for us. 6137 */ 6138 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) { 6139 int depth = DTRACE_USTACK_NFRAMES( 6140 rec->dtrd_arg); 6141 size_t strsize = DTRACE_USTACK_STRSIZE( 6142 rec->dtrd_arg); 6143 uint64_t *buf = (uint64_t *)(tomax + 6144 valoffs); 6145 void *strspace = &buf[depth + 1]; 6146 6147 dtrace_bzero(strspace, 6148 MIN(depth, strsize)); 6149 } 6150 6151 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6152 dtrace_getupcstack((uint64_t *) 6153 (tomax + valoffs), 6154 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); 6155 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6156 continue; 6157 6158 default: 6159 break; 6160 } 6161 6162 dp = act->dta_difo; 6163 ASSERT(dp != NULL); 6164 6165 val = dtrace_dif_emulate(dp, &mstate, vstate, state); 6166 6167 if (*flags & CPU_DTRACE_ERROR) 6168 continue; 6169 6170 switch (act->dta_kind) { 6171 case DTRACEACT_SPECULATE: 6172 ASSERT(buf == &state->dts_buffer[cpuid]); 6173 buf = dtrace_speculation_buffer(state, 6174 cpuid, val); 6175 6176 if (buf == NULL) { 6177 *flags |= CPU_DTRACE_DROP; 6178 continue; 6179 } 6180 6181 offs = dtrace_buffer_reserve(buf, 6182 ecb->dte_needed, ecb->dte_alignment, 6183 state, NULL); 6184 6185 if (offs < 0) { 6186 *flags |= CPU_DTRACE_DROP; 6187 continue; 6188 } 6189 6190 tomax = buf->dtb_tomax; 6191 ASSERT(tomax != NULL); 6192 6193 if (ecb->dte_size != 0) 6194 DTRACE_STORE(uint32_t, tomax, offs, 6195 ecb->dte_epid); 6196 continue; 6197 6198 case DTRACEACT_CHILL: 6199 if (dtrace_priv_kernel_destructive(state)) 6200 dtrace_action_chill(&mstate, val); 6201 continue; 6202 6203 case DTRACEACT_RAISE: 6204 if (dtrace_priv_proc_destructive(state, 6205 &mstate)) 6206 dtrace_action_raise(val); 6207 continue; 6208 6209 case DTRACEACT_COMMIT: 6210 ASSERT(!committed); 6211 6212 /* 6213 * We need to commit our buffer state. 6214 */ 6215 if (ecb->dte_size) 6216 buf->dtb_offset = offs + ecb->dte_size; 6217 buf = &state->dts_buffer[cpuid]; 6218 dtrace_speculation_commit(state, cpuid, val); 6219 committed = 1; 6220 continue; 6221 6222 case DTRACEACT_DISCARD: 6223 dtrace_speculation_discard(state, cpuid, val); 6224 continue; 6225 6226 case DTRACEACT_DIFEXPR: 6227 case DTRACEACT_LIBACT: 6228 case DTRACEACT_PRINTF: 6229 case DTRACEACT_PRINTA: 6230 case DTRACEACT_SYSTEM: 6231 case DTRACEACT_FREOPEN: 6232 case DTRACEACT_TRACEMEM: 6233 break; 6234 6235 case DTRACEACT_TRACEMEM_DYNSIZE: 6236 tracememsize = val; 6237 break; 6238 6239 case DTRACEACT_SYM: 6240 case DTRACEACT_MOD: 6241 if (!dtrace_priv_kernel(state)) 6242 continue; 6243 break; 6244 6245 case DTRACEACT_USYM: 6246 case DTRACEACT_UMOD: 6247 case DTRACEACT_UADDR: { 6248 struct pid *pid = curthread->t_procp->p_pidp; 6249 6250 if (!dtrace_priv_proc(state, &mstate)) 6251 continue; 6252 6253 DTRACE_STORE(uint64_t, tomax, 6254 valoffs, (uint64_t)pid->pid_id); 6255 DTRACE_STORE(uint64_t, tomax, 6256 valoffs + sizeof (uint64_t), val); 6257 6258 continue; 6259 } 6260 6261 case DTRACEACT_EXIT: { 6262 /* 6263 * For the exit action, we are going to attempt 6264 * to atomically set our activity to be 6265 * draining. If this fails (either because 6266 * another CPU has beat us to the exit action, 6267 * or because our current activity is something 6268 * other than ACTIVE or WARMUP), we will 6269 * continue. This assures that the exit action 6270 * can be successfully recorded at most once 6271 * when we're in the ACTIVE state. If we're 6272 * encountering the exit() action while in 6273 * COOLDOWN, however, we want to honor the new 6274 * status code. (We know that we're the only 6275 * thread in COOLDOWN, so there is no race.) 6276 */ 6277 void *activity = &state->dts_activity; 6278 dtrace_activity_t current = state->dts_activity; 6279 6280 if (current == DTRACE_ACTIVITY_COOLDOWN) 6281 break; 6282 6283 if (current != DTRACE_ACTIVITY_WARMUP) 6284 current = DTRACE_ACTIVITY_ACTIVE; 6285 6286 if (dtrace_cas32(activity, current, 6287 DTRACE_ACTIVITY_DRAINING) != current) { 6288 *flags |= CPU_DTRACE_DROP; 6289 continue; 6290 } 6291 6292 break; 6293 } 6294 6295 default: 6296 ASSERT(0); 6297 } 6298 6299 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { 6300 uintptr_t end = valoffs + size; 6301 6302 if (tracememsize != 0 && 6303 valoffs + tracememsize < end) { 6304 end = valoffs + tracememsize; 6305 tracememsize = 0; 6306 } 6307 6308 if (!dtrace_vcanload((void *)(uintptr_t)val, 6309 &dp->dtdo_rtype, &mstate, vstate)) 6310 continue; 6311 6312 /* 6313 * If this is a string, we're going to only 6314 * load until we find the zero byte -- after 6315 * which we'll store zero bytes. 6316 */ 6317 if (dp->dtdo_rtype.dtdt_kind == 6318 DIF_TYPE_STRING) { 6319 char c = '\0' + 1; 6320 int intuple = act->dta_intuple; 6321 size_t s; 6322 6323 for (s = 0; s < size; s++) { 6324 if (c != '\0') 6325 c = dtrace_load8(val++); 6326 6327 DTRACE_STORE(uint8_t, tomax, 6328 valoffs++, c); 6329 6330 if (c == '\0' && intuple) 6331 break; 6332 } 6333 6334 continue; 6335 } 6336 6337 while (valoffs < end) { 6338 DTRACE_STORE(uint8_t, tomax, valoffs++, 6339 dtrace_load8(val++)); 6340 } 6341 6342 continue; 6343 } 6344 6345 switch (size) { 6346 case 0: 6347 break; 6348 6349 case sizeof (uint8_t): 6350 DTRACE_STORE(uint8_t, tomax, valoffs, val); 6351 break; 6352 case sizeof (uint16_t): 6353 DTRACE_STORE(uint16_t, tomax, valoffs, val); 6354 break; 6355 case sizeof (uint32_t): 6356 DTRACE_STORE(uint32_t, tomax, valoffs, val); 6357 break; 6358 case sizeof (uint64_t): 6359 DTRACE_STORE(uint64_t, tomax, valoffs, val); 6360 break; 6361 default: 6362 /* 6363 * Any other size should have been returned by 6364 * reference, not by value. 6365 */ 6366 ASSERT(0); 6367 break; 6368 } 6369 } 6370 6371 if (*flags & CPU_DTRACE_DROP) 6372 continue; 6373 6374 if (*flags & CPU_DTRACE_FAULT) { 6375 int ndx; 6376 dtrace_action_t *err; 6377 6378 buf->dtb_errors++; 6379 6380 if (probe->dtpr_id == dtrace_probeid_error) { 6381 /* 6382 * There's nothing we can do -- we had an 6383 * error on the error probe. We bump an 6384 * error counter to at least indicate that 6385 * this condition happened. 6386 */ 6387 dtrace_error(&state->dts_dblerrors); 6388 continue; 6389 } 6390 6391 if (vtime) { 6392 /* 6393 * Before recursing on dtrace_probe(), we 6394 * need to explicitly clear out our start 6395 * time to prevent it from being accumulated 6396 * into t_dtrace_vtime. 6397 */ 6398 curthread->t_dtrace_start = 0; 6399 } 6400 6401 /* 6402 * Iterate over the actions to figure out which action 6403 * we were processing when we experienced the error. 6404 * Note that act points _past_ the faulting action; if 6405 * act is ecb->dte_action, the fault was in the 6406 * predicate, if it's ecb->dte_action->dta_next it's 6407 * in action #1, and so on. 6408 */ 6409 for (err = ecb->dte_action, ndx = 0; 6410 err != act; err = err->dta_next, ndx++) 6411 continue; 6412 6413 dtrace_probe_error(state, ecb->dte_epid, ndx, 6414 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ? 6415 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags), 6416 cpu_core[cpuid].cpuc_dtrace_illval); 6417 6418 continue; 6419 } 6420 6421 if (!committed) 6422 buf->dtb_offset = offs + ecb->dte_size; 6423 } 6424 6425 if (vtime) 6426 curthread->t_dtrace_start = dtrace_gethrtime(); 6427 6428 dtrace_interrupt_enable(cookie); 6429 } 6430 6431 /* 6432 * DTrace Probe Hashing Functions 6433 * 6434 * The functions in this section (and indeed, the functions in remaining 6435 * sections) are not _called_ from probe context. (Any exceptions to this are 6436 * marked with a "Note:".) Rather, they are called from elsewhere in the 6437 * DTrace framework to look-up probes in, add probes to and remove probes from 6438 * the DTrace probe hashes. (Each probe is hashed by each element of the 6439 * probe tuple -- allowing for fast lookups, regardless of what was 6440 * specified.) 6441 */ 6442 static uint_t 6443 dtrace_hash_str(char *p) 6444 { 6445 unsigned int g; 6446 uint_t hval = 0; 6447 6448 while (*p) { 6449 hval = (hval << 4) + *p++; 6450 if ((g = (hval & 0xf0000000)) != 0) 6451 hval ^= g >> 24; 6452 hval &= ~g; 6453 } 6454 return (hval); 6455 } 6456 6457 static dtrace_hash_t * 6458 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) 6459 { 6460 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); 6461 6462 hash->dth_stroffs = stroffs; 6463 hash->dth_nextoffs = nextoffs; 6464 hash->dth_prevoffs = prevoffs; 6465 6466 hash->dth_size = 1; 6467 hash->dth_mask = hash->dth_size - 1; 6468 6469 hash->dth_tab = kmem_zalloc(hash->dth_size * 6470 sizeof (dtrace_hashbucket_t *), KM_SLEEP); 6471 6472 return (hash); 6473 } 6474 6475 static void 6476 dtrace_hash_destroy(dtrace_hash_t *hash) 6477 { 6478 #ifdef DEBUG 6479 int i; 6480 6481 for (i = 0; i < hash->dth_size; i++) 6482 ASSERT(hash->dth_tab[i] == NULL); 6483 #endif 6484 6485 kmem_free(hash->dth_tab, 6486 hash->dth_size * sizeof (dtrace_hashbucket_t *)); 6487 kmem_free(hash, sizeof (dtrace_hash_t)); 6488 } 6489 6490 static void 6491 dtrace_hash_resize(dtrace_hash_t *hash) 6492 { 6493 int size = hash->dth_size, i, ndx; 6494 int new_size = hash->dth_size << 1; 6495 int new_mask = new_size - 1; 6496 dtrace_hashbucket_t **new_tab, *bucket, *next; 6497 6498 ASSERT((new_size & new_mask) == 0); 6499 6500 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP); 6501 6502 for (i = 0; i < size; i++) { 6503 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { 6504 dtrace_probe_t *probe = bucket->dthb_chain; 6505 6506 ASSERT(probe != NULL); 6507 ndx = DTRACE_HASHSTR(hash, probe) & new_mask; 6508 6509 next = bucket->dthb_next; 6510 bucket->dthb_next = new_tab[ndx]; 6511 new_tab[ndx] = bucket; 6512 } 6513 } 6514 6515 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *)); 6516 hash->dth_tab = new_tab; 6517 hash->dth_size = new_size; 6518 hash->dth_mask = new_mask; 6519 } 6520 6521 static void 6522 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) 6523 { 6524 int hashval = DTRACE_HASHSTR(hash, new); 6525 int ndx = hashval & hash->dth_mask; 6526 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6527 dtrace_probe_t **nextp, **prevp; 6528 6529 for (; bucket != NULL; bucket = bucket->dthb_next) { 6530 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) 6531 goto add; 6532 } 6533 6534 if ((hash->dth_nbuckets >> 1) > hash->dth_size) { 6535 dtrace_hash_resize(hash); 6536 dtrace_hash_add(hash, new); 6537 return; 6538 } 6539 6540 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP); 6541 bucket->dthb_next = hash->dth_tab[ndx]; 6542 hash->dth_tab[ndx] = bucket; 6543 hash->dth_nbuckets++; 6544 6545 add: 6546 nextp = DTRACE_HASHNEXT(hash, new); 6547 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); 6548 *nextp = bucket->dthb_chain; 6549 6550 if (bucket->dthb_chain != NULL) { 6551 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); 6552 ASSERT(*prevp == NULL); 6553 *prevp = new; 6554 } 6555 6556 bucket->dthb_chain = new; 6557 bucket->dthb_len++; 6558 } 6559 6560 static dtrace_probe_t * 6561 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) 6562 { 6563 int hashval = DTRACE_HASHSTR(hash, template); 6564 int ndx = hashval & hash->dth_mask; 6565 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6566 6567 for (; bucket != NULL; bucket = bucket->dthb_next) { 6568 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 6569 return (bucket->dthb_chain); 6570 } 6571 6572 return (NULL); 6573 } 6574 6575 static int 6576 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) 6577 { 6578 int hashval = DTRACE_HASHSTR(hash, template); 6579 int ndx = hashval & hash->dth_mask; 6580 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6581 6582 for (; bucket != NULL; bucket = bucket->dthb_next) { 6583 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 6584 return (bucket->dthb_len); 6585 } 6586 6587 return (NULL); 6588 } 6589 6590 static void 6591 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) 6592 { 6593 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; 6594 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6595 6596 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); 6597 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); 6598 6599 /* 6600 * Find the bucket that we're removing this probe from. 6601 */ 6602 for (; bucket != NULL; bucket = bucket->dthb_next) { 6603 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) 6604 break; 6605 } 6606 6607 ASSERT(bucket != NULL); 6608 6609 if (*prevp == NULL) { 6610 if (*nextp == NULL) { 6611 /* 6612 * The removed probe was the only probe on this 6613 * bucket; we need to remove the bucket. 6614 */ 6615 dtrace_hashbucket_t *b = hash->dth_tab[ndx]; 6616 6617 ASSERT(bucket->dthb_chain == probe); 6618 ASSERT(b != NULL); 6619 6620 if (b == bucket) { 6621 hash->dth_tab[ndx] = bucket->dthb_next; 6622 } else { 6623 while (b->dthb_next != bucket) 6624 b = b->dthb_next; 6625 b->dthb_next = bucket->dthb_next; 6626 } 6627 6628 ASSERT(hash->dth_nbuckets > 0); 6629 hash->dth_nbuckets--; 6630 kmem_free(bucket, sizeof (dtrace_hashbucket_t)); 6631 return; 6632 } 6633 6634 bucket->dthb_chain = *nextp; 6635 } else { 6636 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; 6637 } 6638 6639 if (*nextp != NULL) 6640 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; 6641 } 6642 6643 /* 6644 * DTrace Utility Functions 6645 * 6646 * These are random utility functions that are _not_ called from probe context. 6647 */ 6648 static int 6649 dtrace_badattr(const dtrace_attribute_t *a) 6650 { 6651 return (a->dtat_name > DTRACE_STABILITY_MAX || 6652 a->dtat_data > DTRACE_STABILITY_MAX || 6653 a->dtat_class > DTRACE_CLASS_MAX); 6654 } 6655 6656 /* 6657 * Return a duplicate copy of a string. If the specified string is NULL, 6658 * this function returns a zero-length string. 6659 */ 6660 static char * 6661 dtrace_strdup(const char *str) 6662 { 6663 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); 6664 6665 if (str != NULL) 6666 (void) strcpy(new, str); 6667 6668 return (new); 6669 } 6670 6671 #define DTRACE_ISALPHA(c) \ 6672 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 6673 6674 static int 6675 dtrace_badname(const char *s) 6676 { 6677 char c; 6678 6679 if (s == NULL || (c = *s++) == '\0') 6680 return (0); 6681 6682 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') 6683 return (1); 6684 6685 while ((c = *s++) != '\0') { 6686 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && 6687 c != '-' && c != '_' && c != '.' && c != '`') 6688 return (1); 6689 } 6690 6691 return (0); 6692 } 6693 6694 static void 6695 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) 6696 { 6697 uint32_t priv; 6698 6699 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 6700 /* 6701 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. 6702 */ 6703 priv = DTRACE_PRIV_ALL; 6704 } else { 6705 *uidp = crgetuid(cr); 6706 *zoneidp = crgetzoneid(cr); 6707 6708 priv = 0; 6709 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) 6710 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; 6711 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) 6712 priv |= DTRACE_PRIV_USER; 6713 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) 6714 priv |= DTRACE_PRIV_PROC; 6715 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 6716 priv |= DTRACE_PRIV_OWNER; 6717 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 6718 priv |= DTRACE_PRIV_ZONEOWNER; 6719 } 6720 6721 *privp = priv; 6722 } 6723 6724 #ifdef DTRACE_ERRDEBUG 6725 static void 6726 dtrace_errdebug(const char *str) 6727 { 6728 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; 6729 int occupied = 0; 6730 6731 mutex_enter(&dtrace_errlock); 6732 dtrace_errlast = str; 6733 dtrace_errthread = curthread; 6734 6735 while (occupied++ < DTRACE_ERRHASHSZ) { 6736 if (dtrace_errhash[hval].dter_msg == str) { 6737 dtrace_errhash[hval].dter_count++; 6738 goto out; 6739 } 6740 6741 if (dtrace_errhash[hval].dter_msg != NULL) { 6742 hval = (hval + 1) % DTRACE_ERRHASHSZ; 6743 continue; 6744 } 6745 6746 dtrace_errhash[hval].dter_msg = str; 6747 dtrace_errhash[hval].dter_count = 1; 6748 goto out; 6749 } 6750 6751 panic("dtrace: undersized error hash"); 6752 out: 6753 mutex_exit(&dtrace_errlock); 6754 } 6755 #endif 6756 6757 /* 6758 * DTrace Matching Functions 6759 * 6760 * These functions are used to match groups of probes, given some elements of 6761 * a probe tuple, or some globbed expressions for elements of a probe tuple. 6762 */ 6763 static int 6764 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid, 6765 zoneid_t zoneid) 6766 { 6767 if (priv != DTRACE_PRIV_ALL) { 6768 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags; 6769 uint32_t match = priv & ppriv; 6770 6771 /* 6772 * No PRIV_DTRACE_* privileges... 6773 */ 6774 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | 6775 DTRACE_PRIV_KERNEL)) == 0) 6776 return (0); 6777 6778 /* 6779 * No matching bits, but there were bits to match... 6780 */ 6781 if (match == 0 && ppriv != 0) 6782 return (0); 6783 6784 /* 6785 * Need to have permissions to the process, but don't... 6786 */ 6787 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && 6788 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) { 6789 return (0); 6790 } 6791 6792 /* 6793 * Need to be in the same zone unless we possess the 6794 * privilege to examine all zones. 6795 */ 6796 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 && 6797 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) { 6798 return (0); 6799 } 6800 } 6801 6802 return (1); 6803 } 6804 6805 /* 6806 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 6807 * consists of input pattern strings and an ops-vector to evaluate them. 6808 * This function returns >0 for match, 0 for no match, and <0 for error. 6809 */ 6810 static int 6811 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp, 6812 uint32_t priv, uid_t uid, zoneid_t zoneid) 6813 { 6814 dtrace_provider_t *pvp = prp->dtpr_provider; 6815 int rv; 6816 6817 if (pvp->dtpv_defunct) 6818 return (0); 6819 6820 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0) 6821 return (rv); 6822 6823 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0) 6824 return (rv); 6825 6826 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0) 6827 return (rv); 6828 6829 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0) 6830 return (rv); 6831 6832 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0) 6833 return (0); 6834 6835 return (rv); 6836 } 6837 6838 /* 6839 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 6840 * interface for matching a glob pattern 'p' to an input string 's'. Unlike 6841 * libc's version, the kernel version only applies to 8-bit ASCII strings. 6842 * In addition, all of the recursion cases except for '*' matching have been 6843 * unwound. For '*', we still implement recursive evaluation, but a depth 6844 * counter is maintained and matching is aborted if we recurse too deep. 6845 * The function returns 0 if no match, >0 if match, and <0 if recursion error. 6846 */ 6847 static int 6848 dtrace_match_glob(const char *s, const char *p, int depth) 6849 { 6850 const char *olds; 6851 char s1, c; 6852 int gs; 6853 6854 if (depth > DTRACE_PROBEKEY_MAXDEPTH) 6855 return (-1); 6856 6857 if (s == NULL) 6858 s = ""; /* treat NULL as empty string */ 6859 6860 top: 6861 olds = s; 6862 s1 = *s++; 6863 6864 if (p == NULL) 6865 return (0); 6866 6867 if ((c = *p++) == '\0') 6868 return (s1 == '\0'); 6869 6870 switch (c) { 6871 case '[': { 6872 int ok = 0, notflag = 0; 6873 char lc = '\0'; 6874 6875 if (s1 == '\0') 6876 return (0); 6877 6878 if (*p == '!') { 6879 notflag = 1; 6880 p++; 6881 } 6882 6883 if ((c = *p++) == '\0') 6884 return (0); 6885 6886 do { 6887 if (c == '-' && lc != '\0' && *p != ']') { 6888 if ((c = *p++) == '\0') 6889 return (0); 6890 if (c == '\\' && (c = *p++) == '\0') 6891 return (0); 6892 6893 if (notflag) { 6894 if (s1 < lc || s1 > c) 6895 ok++; 6896 else 6897 return (0); 6898 } else if (lc <= s1 && s1 <= c) 6899 ok++; 6900 6901 } else if (c == '\\' && (c = *p++) == '\0') 6902 return (0); 6903 6904 lc = c; /* save left-hand 'c' for next iteration */ 6905 6906 if (notflag) { 6907 if (s1 != c) 6908 ok++; 6909 else 6910 return (0); 6911 } else if (s1 == c) 6912 ok++; 6913 6914 if ((c = *p++) == '\0') 6915 return (0); 6916 6917 } while (c != ']'); 6918 6919 if (ok) 6920 goto top; 6921 6922 return (0); 6923 } 6924 6925 case '\\': 6926 if ((c = *p++) == '\0') 6927 return (0); 6928 /*FALLTHRU*/ 6929 6930 default: 6931 if (c != s1) 6932 return (0); 6933 /*FALLTHRU*/ 6934 6935 case '?': 6936 if (s1 != '\0') 6937 goto top; 6938 return (0); 6939 6940 case '*': 6941 while (*p == '*') 6942 p++; /* consecutive *'s are identical to a single one */ 6943 6944 if (*p == '\0') 6945 return (1); 6946 6947 for (s = olds; *s != '\0'; s++) { 6948 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0) 6949 return (gs); 6950 } 6951 6952 return (0); 6953 } 6954 } 6955 6956 /*ARGSUSED*/ 6957 static int 6958 dtrace_match_string(const char *s, const char *p, int depth) 6959 { 6960 return (s != NULL && strcmp(s, p) == 0); 6961 } 6962 6963 /*ARGSUSED*/ 6964 static int 6965 dtrace_match_nul(const char *s, const char *p, int depth) 6966 { 6967 return (1); /* always match the empty pattern */ 6968 } 6969 6970 /*ARGSUSED*/ 6971 static int 6972 dtrace_match_nonzero(const char *s, const char *p, int depth) 6973 { 6974 return (s != NULL && s[0] != '\0'); 6975 } 6976 6977 static int 6978 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, 6979 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) 6980 { 6981 dtrace_probe_t template, *probe; 6982 dtrace_hash_t *hash = NULL; 6983 int len, rc, best = INT_MAX, nmatched = 0; 6984 dtrace_id_t i; 6985 6986 ASSERT(MUTEX_HELD(&dtrace_lock)); 6987 6988 /* 6989 * If the probe ID is specified in the key, just lookup by ID and 6990 * invoke the match callback once if a matching probe is found. 6991 */ 6992 if (pkp->dtpk_id != DTRACE_IDNONE) { 6993 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && 6994 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { 6995 if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) 6996 return (DTRACE_MATCH_FAIL); 6997 nmatched++; 6998 } 6999 return (nmatched); 7000 } 7001 7002 template.dtpr_mod = (char *)pkp->dtpk_mod; 7003 template.dtpr_func = (char *)pkp->dtpk_func; 7004 template.dtpr_name = (char *)pkp->dtpk_name; 7005 7006 /* 7007 * We want to find the most distinct of the module name, function 7008 * name, and name. So for each one that is not a glob pattern or 7009 * empty string, we perform a lookup in the corresponding hash and 7010 * use the hash table with the fewest collisions to do our search. 7011 */ 7012 if (pkp->dtpk_mmatch == &dtrace_match_string && 7013 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { 7014 best = len; 7015 hash = dtrace_bymod; 7016 } 7017 7018 if (pkp->dtpk_fmatch == &dtrace_match_string && 7019 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) { 7020 best = len; 7021 hash = dtrace_byfunc; 7022 } 7023 7024 if (pkp->dtpk_nmatch == &dtrace_match_string && 7025 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) { 7026 best = len; 7027 hash = dtrace_byname; 7028 } 7029 7030 /* 7031 * If we did not select a hash table, iterate over every probe and 7032 * invoke our callback for each one that matches our input probe key. 7033 */ 7034 if (hash == NULL) { 7035 for (i = 0; i < dtrace_nprobes; i++) { 7036 if ((probe = dtrace_probes[i]) == NULL || 7037 dtrace_match_probe(probe, pkp, priv, uid, 7038 zoneid) <= 0) 7039 continue; 7040 7041 nmatched++; 7042 7043 if ((rc = (*matched)(probe, arg)) != 7044 DTRACE_MATCH_NEXT) { 7045 if (rc == DTRACE_MATCH_FAIL) 7046 return (DTRACE_MATCH_FAIL); 7047 break; 7048 } 7049 } 7050 7051 return (nmatched); 7052 } 7053 7054 /* 7055 * If we selected a hash table, iterate over each probe of the same key 7056 * name and invoke the callback for every probe that matches the other 7057 * attributes of our input probe key. 7058 */ 7059 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; 7060 probe = *(DTRACE_HASHNEXT(hash, probe))) { 7061 7062 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) 7063 continue; 7064 7065 nmatched++; 7066 7067 if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { 7068 if (rc == DTRACE_MATCH_FAIL) 7069 return (DTRACE_MATCH_FAIL); 7070 break; 7071 } 7072 } 7073 7074 return (nmatched); 7075 } 7076 7077 /* 7078 * Return the function pointer dtrace_probecmp() should use to compare the 7079 * specified pattern with a string. For NULL or empty patterns, we select 7080 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob(). 7081 * For non-empty non-glob strings, we use dtrace_match_string(). 7082 */ 7083 static dtrace_probekey_f * 7084 dtrace_probekey_func(const char *p) 7085 { 7086 char c; 7087 7088 if (p == NULL || *p == '\0') 7089 return (&dtrace_match_nul); 7090 7091 while ((c = *p++) != '\0') { 7092 if (c == '[' || c == '?' || c == '*' || c == '\\') 7093 return (&dtrace_match_glob); 7094 } 7095 7096 return (&dtrace_match_string); 7097 } 7098 7099 /* 7100 * Build a probe comparison key for use with dtrace_match_probe() from the 7101 * given probe description. By convention, a null key only matches anchored 7102 * probes: if each field is the empty string, reset dtpk_fmatch to 7103 * dtrace_match_nonzero(). 7104 */ 7105 static void 7106 dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) 7107 { 7108 pkp->dtpk_prov = pdp->dtpd_provider; 7109 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); 7110 7111 pkp->dtpk_mod = pdp->dtpd_mod; 7112 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); 7113 7114 pkp->dtpk_func = pdp->dtpd_func; 7115 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); 7116 7117 pkp->dtpk_name = pdp->dtpd_name; 7118 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); 7119 7120 pkp->dtpk_id = pdp->dtpd_id; 7121 7122 if (pkp->dtpk_id == DTRACE_IDNONE && 7123 pkp->dtpk_pmatch == &dtrace_match_nul && 7124 pkp->dtpk_mmatch == &dtrace_match_nul && 7125 pkp->dtpk_fmatch == &dtrace_match_nul && 7126 pkp->dtpk_nmatch == &dtrace_match_nul) 7127 pkp->dtpk_fmatch = &dtrace_match_nonzero; 7128 } 7129 7130 /* 7131 * DTrace Provider-to-Framework API Functions 7132 * 7133 * These functions implement much of the Provider-to-Framework API, as 7134 * described in <sys/dtrace.h>. The parts of the API not in this section are 7135 * the functions in the API for probe management (found below), and 7136 * dtrace_probe() itself (found above). 7137 */ 7138 7139 /* 7140 * Register the calling provider with the DTrace framework. This should 7141 * generally be called by DTrace providers in their attach(9E) entry point. 7142 */ 7143 int 7144 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, 7145 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp) 7146 { 7147 dtrace_provider_t *provider; 7148 7149 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { 7150 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7151 "arguments", name ? name : "<NULL>"); 7152 return (EINVAL); 7153 } 7154 7155 if (name[0] == '\0' || dtrace_badname(name)) { 7156 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7157 "provider name", name); 7158 return (EINVAL); 7159 } 7160 7161 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || 7162 pops->dtps_enable == NULL || pops->dtps_disable == NULL || 7163 pops->dtps_destroy == NULL || 7164 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { 7165 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7166 "provider ops", name); 7167 return (EINVAL); 7168 } 7169 7170 if (dtrace_badattr(&pap->dtpa_provider) || 7171 dtrace_badattr(&pap->dtpa_mod) || 7172 dtrace_badattr(&pap->dtpa_func) || 7173 dtrace_badattr(&pap->dtpa_name) || 7174 dtrace_badattr(&pap->dtpa_args)) { 7175 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7176 "provider attributes", name); 7177 return (EINVAL); 7178 } 7179 7180 if (priv & ~DTRACE_PRIV_ALL) { 7181 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7182 "privilege attributes", name); 7183 return (EINVAL); 7184 } 7185 7186 if ((priv & DTRACE_PRIV_KERNEL) && 7187 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && 7188 pops->dtps_mode == NULL) { 7189 cmn_err(CE_WARN, "failed to register provider '%s': need " 7190 "dtps_mode() op for given privilege attributes", name); 7191 return (EINVAL); 7192 } 7193 7194 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); 7195 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 7196 (void) strcpy(provider->dtpv_name, name); 7197 7198 provider->dtpv_attr = *pap; 7199 provider->dtpv_priv.dtpp_flags = priv; 7200 if (cr != NULL) { 7201 provider->dtpv_priv.dtpp_uid = crgetuid(cr); 7202 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); 7203 } 7204 provider->dtpv_pops = *pops; 7205 7206 if (pops->dtps_provide == NULL) { 7207 ASSERT(pops->dtps_provide_module != NULL); 7208 provider->dtpv_pops.dtps_provide = 7209 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop; 7210 } 7211 7212 if (pops->dtps_provide_module == NULL) { 7213 ASSERT(pops->dtps_provide != NULL); 7214 provider->dtpv_pops.dtps_provide_module = 7215 (void (*)(void *, struct modctl *))dtrace_nullop; 7216 } 7217 7218 if (pops->dtps_suspend == NULL) { 7219 ASSERT(pops->dtps_resume == NULL); 7220 provider->dtpv_pops.dtps_suspend = 7221 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 7222 provider->dtpv_pops.dtps_resume = 7223 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 7224 } 7225 7226 provider->dtpv_arg = arg; 7227 *idp = (dtrace_provider_id_t)provider; 7228 7229 if (pops == &dtrace_provider_ops) { 7230 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 7231 ASSERT(MUTEX_HELD(&dtrace_lock)); 7232 ASSERT(dtrace_anon.dta_enabling == NULL); 7233 7234 /* 7235 * We make sure that the DTrace provider is at the head of 7236 * the provider chain. 7237 */ 7238 provider->dtpv_next = dtrace_provider; 7239 dtrace_provider = provider; 7240 return (0); 7241 } 7242 7243 mutex_enter(&dtrace_provider_lock); 7244 mutex_enter(&dtrace_lock); 7245 7246 /* 7247 * If there is at least one provider registered, we'll add this 7248 * provider after the first provider. 7249 */ 7250 if (dtrace_provider != NULL) { 7251 provider->dtpv_next = dtrace_provider->dtpv_next; 7252 dtrace_provider->dtpv_next = provider; 7253 } else { 7254 dtrace_provider = provider; 7255 } 7256 7257 if (dtrace_retained != NULL) { 7258 dtrace_enabling_provide(provider); 7259 7260 /* 7261 * Now we need to call dtrace_enabling_matchall() -- which 7262 * will acquire cpu_lock and dtrace_lock. We therefore need 7263 * to drop all of our locks before calling into it... 7264 */ 7265 mutex_exit(&dtrace_lock); 7266 mutex_exit(&dtrace_provider_lock); 7267 dtrace_enabling_matchall(); 7268 7269 return (0); 7270 } 7271 7272 mutex_exit(&dtrace_lock); 7273 mutex_exit(&dtrace_provider_lock); 7274 7275 return (0); 7276 } 7277 7278 /* 7279 * Unregister the specified provider from the DTrace framework. This should 7280 * generally be called by DTrace providers in their detach(9E) entry point. 7281 */ 7282 int 7283 dtrace_unregister(dtrace_provider_id_t id) 7284 { 7285 dtrace_provider_t *old = (dtrace_provider_t *)id; 7286 dtrace_provider_t *prev = NULL; 7287 int i, self = 0, noreap = 0; 7288 dtrace_probe_t *probe, *first = NULL; 7289 7290 if (old->dtpv_pops.dtps_enable == 7291 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { 7292 /* 7293 * If DTrace itself is the provider, we're called with locks 7294 * already held. 7295 */ 7296 ASSERT(old == dtrace_provider); 7297 ASSERT(dtrace_devi != NULL); 7298 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 7299 ASSERT(MUTEX_HELD(&dtrace_lock)); 7300 self = 1; 7301 7302 if (dtrace_provider->dtpv_next != NULL) { 7303 /* 7304 * There's another provider here; return failure. 7305 */ 7306 return (EBUSY); 7307 } 7308 } else { 7309 mutex_enter(&dtrace_provider_lock); 7310 mutex_enter(&mod_lock); 7311 mutex_enter(&dtrace_lock); 7312 } 7313 7314 /* 7315 * If anyone has /dev/dtrace open, or if there are anonymous enabled 7316 * probes, we refuse to let providers slither away, unless this 7317 * provider has already been explicitly invalidated. 7318 */ 7319 if (!old->dtpv_defunct && 7320 (dtrace_opens || (dtrace_anon.dta_state != NULL && 7321 dtrace_anon.dta_state->dts_necbs > 0))) { 7322 if (!self) { 7323 mutex_exit(&dtrace_lock); 7324 mutex_exit(&mod_lock); 7325 mutex_exit(&dtrace_provider_lock); 7326 } 7327 return (EBUSY); 7328 } 7329 7330 /* 7331 * Attempt to destroy the probes associated with this provider. 7332 */ 7333 for (i = 0; i < dtrace_nprobes; i++) { 7334 if ((probe = dtrace_probes[i]) == NULL) 7335 continue; 7336 7337 if (probe->dtpr_provider != old) 7338 continue; 7339 7340 if (probe->dtpr_ecb == NULL) 7341 continue; 7342 7343 /* 7344 * If we are trying to unregister a defunct provider, and the 7345 * provider was made defunct within the interval dictated by 7346 * dtrace_unregister_defunct_reap, we'll (asynchronously) 7347 * attempt to reap our enablings. To denote that the provider 7348 * should reattempt to unregister itself at some point in the 7349 * future, we will return a differentiable error code (EAGAIN 7350 * instead of EBUSY) in this case. 7351 */ 7352 if (dtrace_gethrtime() - old->dtpv_defunct > 7353 dtrace_unregister_defunct_reap) 7354 noreap = 1; 7355 7356 if (!self) { 7357 mutex_exit(&dtrace_lock); 7358 mutex_exit(&mod_lock); 7359 mutex_exit(&dtrace_provider_lock); 7360 } 7361 7362 if (noreap) 7363 return (EBUSY); 7364 7365 (void) taskq_dispatch(dtrace_taskq, 7366 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); 7367 7368 return (EAGAIN); 7369 } 7370 7371 /* 7372 * All of the probes for this provider are disabled; we can safely 7373 * remove all of them from their hash chains and from the probe array. 7374 */ 7375 for (i = 0; i < dtrace_nprobes; i++) { 7376 if ((probe = dtrace_probes[i]) == NULL) 7377 continue; 7378 7379 if (probe->dtpr_provider != old) 7380 continue; 7381 7382 dtrace_probes[i] = NULL; 7383 7384 dtrace_hash_remove(dtrace_bymod, probe); 7385 dtrace_hash_remove(dtrace_byfunc, probe); 7386 dtrace_hash_remove(dtrace_byname, probe); 7387 7388 if (first == NULL) { 7389 first = probe; 7390 probe->dtpr_nextmod = NULL; 7391 } else { 7392 probe->dtpr_nextmod = first; 7393 first = probe; 7394 } 7395 } 7396 7397 /* 7398 * The provider's probes have been removed from the hash chains and 7399 * from the probe array. Now issue a dtrace_sync() to be sure that 7400 * everyone has cleared out from any probe array processing. 7401 */ 7402 dtrace_sync(); 7403 7404 for (probe = first; probe != NULL; probe = first) { 7405 first = probe->dtpr_nextmod; 7406 7407 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, 7408 probe->dtpr_arg); 7409 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 7410 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 7411 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 7412 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); 7413 kmem_free(probe, sizeof (dtrace_probe_t)); 7414 } 7415 7416 if ((prev = dtrace_provider) == old) { 7417 ASSERT(self || dtrace_devi == NULL); 7418 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL); 7419 dtrace_provider = old->dtpv_next; 7420 } else { 7421 while (prev != NULL && prev->dtpv_next != old) 7422 prev = prev->dtpv_next; 7423 7424 if (prev == NULL) { 7425 panic("attempt to unregister non-existent " 7426 "dtrace provider %p\n", (void *)id); 7427 } 7428 7429 prev->dtpv_next = old->dtpv_next; 7430 } 7431 7432 if (!self) { 7433 mutex_exit(&dtrace_lock); 7434 mutex_exit(&mod_lock); 7435 mutex_exit(&dtrace_provider_lock); 7436 } 7437 7438 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); 7439 kmem_free(old, sizeof (dtrace_provider_t)); 7440 7441 return (0); 7442 } 7443 7444 /* 7445 * Invalidate the specified provider. All subsequent probe lookups for the 7446 * specified provider will fail, but its probes will not be removed. 7447 */ 7448 void 7449 dtrace_invalidate(dtrace_provider_id_t id) 7450 { 7451 dtrace_provider_t *pvp = (dtrace_provider_t *)id; 7452 7453 ASSERT(pvp->dtpv_pops.dtps_enable != 7454 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); 7455 7456 mutex_enter(&dtrace_provider_lock); 7457 mutex_enter(&dtrace_lock); 7458 7459 pvp->dtpv_defunct = dtrace_gethrtime(); 7460 7461 mutex_exit(&dtrace_lock); 7462 mutex_exit(&dtrace_provider_lock); 7463 } 7464 7465 /* 7466 * Indicate whether or not DTrace has attached. 7467 */ 7468 int 7469 dtrace_attached(void) 7470 { 7471 /* 7472 * dtrace_provider will be non-NULL iff the DTrace driver has 7473 * attached. (It's non-NULL because DTrace is always itself a 7474 * provider.) 7475 */ 7476 return (dtrace_provider != NULL); 7477 } 7478 7479 /* 7480 * Remove all the unenabled probes for the given provider. This function is 7481 * not unlike dtrace_unregister(), except that it doesn't remove the provider 7482 * -- just as many of its associated probes as it can. 7483 */ 7484 int 7485 dtrace_condense(dtrace_provider_id_t id) 7486 { 7487 dtrace_provider_t *prov = (dtrace_provider_t *)id; 7488 int i; 7489 dtrace_probe_t *probe; 7490 7491 /* 7492 * Make sure this isn't the dtrace provider itself. 7493 */ 7494 ASSERT(prov->dtpv_pops.dtps_enable != 7495 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); 7496 7497 mutex_enter(&dtrace_provider_lock); 7498 mutex_enter(&dtrace_lock); 7499 7500 /* 7501 * Attempt to destroy the probes associated with this provider. 7502 */ 7503 for (i = 0; i < dtrace_nprobes; i++) { 7504 if ((probe = dtrace_probes[i]) == NULL) 7505 continue; 7506 7507 if (probe->dtpr_provider != prov) 7508 continue; 7509 7510 if (probe->dtpr_ecb != NULL) 7511 continue; 7512 7513 dtrace_probes[i] = NULL; 7514 7515 dtrace_hash_remove(dtrace_bymod, probe); 7516 dtrace_hash_remove(dtrace_byfunc, probe); 7517 dtrace_hash_remove(dtrace_byname, probe); 7518 7519 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, 7520 probe->dtpr_arg); 7521 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 7522 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 7523 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 7524 kmem_free(probe, sizeof (dtrace_probe_t)); 7525 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); 7526 } 7527 7528 mutex_exit(&dtrace_lock); 7529 mutex_exit(&dtrace_provider_lock); 7530 7531 return (0); 7532 } 7533 7534 /* 7535 * DTrace Probe Management Functions 7536 * 7537 * The functions in this section perform the DTrace probe management, 7538 * including functions to create probes, look-up probes, and call into the 7539 * providers to request that probes be provided. Some of these functions are 7540 * in the Provider-to-Framework API; these functions can be identified by the 7541 * fact that they are not declared "static". 7542 */ 7543 7544 /* 7545 * Create a probe with the specified module name, function name, and name. 7546 */ 7547 dtrace_id_t 7548 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, 7549 const char *func, const char *name, int aframes, void *arg) 7550 { 7551 dtrace_probe_t *probe, **probes; 7552 dtrace_provider_t *provider = (dtrace_provider_t *)prov; 7553 dtrace_id_t id; 7554 7555 if (provider == dtrace_provider) { 7556 ASSERT(MUTEX_HELD(&dtrace_lock)); 7557 } else { 7558 mutex_enter(&dtrace_lock); 7559 } 7560 7561 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, 7562 VM_BESTFIT | VM_SLEEP); 7563 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); 7564 7565 probe->dtpr_id = id; 7566 probe->dtpr_gen = dtrace_probegen++; 7567 probe->dtpr_mod = dtrace_strdup(mod); 7568 probe->dtpr_func = dtrace_strdup(func); 7569 probe->dtpr_name = dtrace_strdup(name); 7570 probe->dtpr_arg = arg; 7571 probe->dtpr_aframes = aframes; 7572 probe->dtpr_provider = provider; 7573 7574 dtrace_hash_add(dtrace_bymod, probe); 7575 dtrace_hash_add(dtrace_byfunc, probe); 7576 dtrace_hash_add(dtrace_byname, probe); 7577 7578 if (id - 1 >= dtrace_nprobes) { 7579 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); 7580 size_t nsize = osize << 1; 7581 7582 if (nsize == 0) { 7583 ASSERT(osize == 0); 7584 ASSERT(dtrace_probes == NULL); 7585 nsize = sizeof (dtrace_probe_t *); 7586 } 7587 7588 probes = kmem_zalloc(nsize, KM_SLEEP); 7589 7590 if (dtrace_probes == NULL) { 7591 ASSERT(osize == 0); 7592 dtrace_probes = probes; 7593 dtrace_nprobes = 1; 7594 } else { 7595 dtrace_probe_t **oprobes = dtrace_probes; 7596 7597 bcopy(oprobes, probes, osize); 7598 dtrace_membar_producer(); 7599 dtrace_probes = probes; 7600 7601 dtrace_sync(); 7602 7603 /* 7604 * All CPUs are now seeing the new probes array; we can 7605 * safely free the old array. 7606 */ 7607 kmem_free(oprobes, osize); 7608 dtrace_nprobes <<= 1; 7609 } 7610 7611 ASSERT(id - 1 < dtrace_nprobes); 7612 } 7613 7614 ASSERT(dtrace_probes[id - 1] == NULL); 7615 dtrace_probes[id - 1] = probe; 7616 7617 if (provider != dtrace_provider) 7618 mutex_exit(&dtrace_lock); 7619 7620 return (id); 7621 } 7622 7623 static dtrace_probe_t * 7624 dtrace_probe_lookup_id(dtrace_id_t id) 7625 { 7626 ASSERT(MUTEX_HELD(&dtrace_lock)); 7627 7628 if (id == 0 || id > dtrace_nprobes) 7629 return (NULL); 7630 7631 return (dtrace_probes[id - 1]); 7632 } 7633 7634 static int 7635 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) 7636 { 7637 *((dtrace_id_t *)arg) = probe->dtpr_id; 7638 7639 return (DTRACE_MATCH_DONE); 7640 } 7641 7642 /* 7643 * Look up a probe based on provider and one or more of module name, function 7644 * name and probe name. 7645 */ 7646 dtrace_id_t 7647 dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, 7648 const char *func, const char *name) 7649 { 7650 dtrace_probekey_t pkey; 7651 dtrace_id_t id; 7652 int match; 7653 7654 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; 7655 pkey.dtpk_pmatch = &dtrace_match_string; 7656 pkey.dtpk_mod = mod; 7657 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; 7658 pkey.dtpk_func = func; 7659 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; 7660 pkey.dtpk_name = name; 7661 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; 7662 pkey.dtpk_id = DTRACE_IDNONE; 7663 7664 mutex_enter(&dtrace_lock); 7665 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, 7666 dtrace_probe_lookup_match, &id); 7667 mutex_exit(&dtrace_lock); 7668 7669 ASSERT(match == 1 || match == 0); 7670 return (match ? id : 0); 7671 } 7672 7673 /* 7674 * Returns the probe argument associated with the specified probe. 7675 */ 7676 void * 7677 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) 7678 { 7679 dtrace_probe_t *probe; 7680 void *rval = NULL; 7681 7682 mutex_enter(&dtrace_lock); 7683 7684 if ((probe = dtrace_probe_lookup_id(pid)) != NULL && 7685 probe->dtpr_provider == (dtrace_provider_t *)id) 7686 rval = probe->dtpr_arg; 7687 7688 mutex_exit(&dtrace_lock); 7689 7690 return (rval); 7691 } 7692 7693 /* 7694 * Copy a probe into a probe description. 7695 */ 7696 static void 7697 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) 7698 { 7699 bzero(pdp, sizeof (dtrace_probedesc_t)); 7700 pdp->dtpd_id = prp->dtpr_id; 7701 7702 (void) strncpy(pdp->dtpd_provider, 7703 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); 7704 7705 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); 7706 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); 7707 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); 7708 } 7709 7710 /* 7711 * Called to indicate that a probe -- or probes -- should be provided by a 7712 * specfied provider. If the specified description is NULL, the provider will 7713 * be told to provide all of its probes. (This is done whenever a new 7714 * consumer comes along, or whenever a retained enabling is to be matched.) If 7715 * the specified description is non-NULL, the provider is given the 7716 * opportunity to dynamically provide the specified probe, allowing providers 7717 * to support the creation of probes on-the-fly. (So-called _autocreated_ 7718 * probes.) If the provider is NULL, the operations will be applied to all 7719 * providers; if the provider is non-NULL the operations will only be applied 7720 * to the specified provider. The dtrace_provider_lock must be held, and the 7721 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 7722 * will need to grab the dtrace_lock when it reenters the framework through 7723 * dtrace_probe_lookup(), dtrace_probe_create(), etc. 7724 */ 7725 static void 7726 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) 7727 { 7728 struct modctl *ctl; 7729 int all = 0; 7730 7731 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 7732 7733 if (prv == NULL) { 7734 all = 1; 7735 prv = dtrace_provider; 7736 } 7737 7738 do { 7739 /* 7740 * First, call the blanket provide operation. 7741 */ 7742 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); 7743 7744 /* 7745 * Now call the per-module provide operation. We will grab 7746 * mod_lock to prevent the list from being modified. Note 7747 * that this also prevents the mod_busy bits from changing. 7748 * (mod_busy can only be changed with mod_lock held.) 7749 */ 7750 mutex_enter(&mod_lock); 7751 7752 ctl = &modules; 7753 do { 7754 if (ctl->mod_busy || ctl->mod_mp == NULL) 7755 continue; 7756 7757 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 7758 7759 } while ((ctl = ctl->mod_next) != &modules); 7760 7761 mutex_exit(&mod_lock); 7762 } while (all && (prv = prv->dtpv_next) != NULL); 7763 } 7764 7765 /* 7766 * Iterate over each probe, and call the Framework-to-Provider API function 7767 * denoted by offs. 7768 */ 7769 static void 7770 dtrace_probe_foreach(uintptr_t offs) 7771 { 7772 dtrace_provider_t *prov; 7773 void (*func)(void *, dtrace_id_t, void *); 7774 dtrace_probe_t *probe; 7775 dtrace_icookie_t cookie; 7776 int i; 7777 7778 /* 7779 * We disable interrupts to walk through the probe array. This is 7780 * safe -- the dtrace_sync() in dtrace_unregister() assures that we 7781 * won't see stale data. 7782 */ 7783 cookie = dtrace_interrupt_disable(); 7784 7785 for (i = 0; i < dtrace_nprobes; i++) { 7786 if ((probe = dtrace_probes[i]) == NULL) 7787 continue; 7788 7789 if (probe->dtpr_ecb == NULL) { 7790 /* 7791 * This probe isn't enabled -- don't call the function. 7792 */ 7793 continue; 7794 } 7795 7796 prov = probe->dtpr_provider; 7797 func = *((void(**)(void *, dtrace_id_t, void *)) 7798 ((uintptr_t)&prov->dtpv_pops + offs)); 7799 7800 func(prov->dtpv_arg, i + 1, probe->dtpr_arg); 7801 } 7802 7803 dtrace_interrupt_enable(cookie); 7804 } 7805 7806 static int 7807 dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) 7808 { 7809 dtrace_probekey_t pkey; 7810 uint32_t priv; 7811 uid_t uid; 7812 zoneid_t zoneid; 7813 7814 ASSERT(MUTEX_HELD(&dtrace_lock)); 7815 dtrace_ecb_create_cache = NULL; 7816 7817 if (desc == NULL) { 7818 /* 7819 * If we're passed a NULL description, we're being asked to 7820 * create an ECB with a NULL probe. 7821 */ 7822 (void) dtrace_ecb_create_enable(NULL, enab); 7823 return (0); 7824 } 7825 7826 dtrace_probekey(desc, &pkey); 7827 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, 7828 &priv, &uid, &zoneid); 7829 7830 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, 7831 enab)); 7832 } 7833 7834 /* 7835 * DTrace Helper Provider Functions 7836 */ 7837 static void 7838 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr) 7839 { 7840 attr->dtat_name = DOF_ATTR_NAME(dofattr); 7841 attr->dtat_data = DOF_ATTR_DATA(dofattr); 7842 attr->dtat_class = DOF_ATTR_CLASS(dofattr); 7843 } 7844 7845 static void 7846 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, 7847 const dof_provider_t *dofprov, char *strtab) 7848 { 7849 hprov->dthpv_provname = strtab + dofprov->dofpv_name; 7850 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, 7851 dofprov->dofpv_provattr); 7852 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, 7853 dofprov->dofpv_modattr); 7854 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, 7855 dofprov->dofpv_funcattr); 7856 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, 7857 dofprov->dofpv_nameattr); 7858 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, 7859 dofprov->dofpv_argsattr); 7860 } 7861 7862 static void 7863 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 7864 { 7865 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7866 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7867 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 7868 dof_provider_t *provider; 7869 dof_probe_t *probe; 7870 uint32_t *off, *enoff; 7871 uint8_t *arg; 7872 char *strtab; 7873 uint_t i, nprobes; 7874 dtrace_helper_provdesc_t dhpv; 7875 dtrace_helper_probedesc_t dhpb; 7876 dtrace_meta_t *meta = dtrace_meta_pid; 7877 dtrace_mops_t *mops = &meta->dtm_mops; 7878 void *parg; 7879 7880 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 7881 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7882 provider->dofpv_strtab * dof->dofh_secsize); 7883 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7884 provider->dofpv_probes * dof->dofh_secsize); 7885 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7886 provider->dofpv_prargs * dof->dofh_secsize); 7887 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7888 provider->dofpv_proffs * dof->dofh_secsize); 7889 7890 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 7891 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); 7892 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 7893 enoff = NULL; 7894 7895 /* 7896 * See dtrace_helper_provider_validate(). 7897 */ 7898 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 7899 provider->dofpv_prenoffs != DOF_SECT_NONE) { 7900 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7901 provider->dofpv_prenoffs * dof->dofh_secsize); 7902 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset); 7903 } 7904 7905 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 7906 7907 /* 7908 * Create the provider. 7909 */ 7910 dtrace_dofprov2hprov(&dhpv, provider, strtab); 7911 7912 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) 7913 return; 7914 7915 meta->dtm_count++; 7916 7917 /* 7918 * Create the probes. 7919 */ 7920 for (i = 0; i < nprobes; i++) { 7921 probe = (dof_probe_t *)(uintptr_t)(daddr + 7922 prb_sec->dofs_offset + i * prb_sec->dofs_entsize); 7923 7924 dhpb.dthpb_mod = dhp->dofhp_mod; 7925 dhpb.dthpb_func = strtab + probe->dofpr_func; 7926 dhpb.dthpb_name = strtab + probe->dofpr_name; 7927 dhpb.dthpb_base = probe->dofpr_addr; 7928 dhpb.dthpb_offs = off + probe->dofpr_offidx; 7929 dhpb.dthpb_noffs = probe->dofpr_noffs; 7930 if (enoff != NULL) { 7931 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; 7932 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; 7933 } else { 7934 dhpb.dthpb_enoffs = NULL; 7935 dhpb.dthpb_nenoffs = 0; 7936 } 7937 dhpb.dthpb_args = arg + probe->dofpr_argidx; 7938 dhpb.dthpb_nargc = probe->dofpr_nargc; 7939 dhpb.dthpb_xargc = probe->dofpr_xargc; 7940 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; 7941 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; 7942 7943 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); 7944 } 7945 } 7946 7947 static void 7948 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) 7949 { 7950 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7951 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7952 int i; 7953 7954 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 7955 7956 for (i = 0; i < dof->dofh_secnum; i++) { 7957 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 7958 dof->dofh_secoff + i * dof->dofh_secsize); 7959 7960 if (sec->dofs_type != DOF_SECT_PROVIDER) 7961 continue; 7962 7963 dtrace_helper_provide_one(dhp, sec, pid); 7964 } 7965 7966 /* 7967 * We may have just created probes, so we must now rematch against 7968 * any retained enablings. Note that this call will acquire both 7969 * cpu_lock and dtrace_lock; the fact that we are holding 7970 * dtrace_meta_lock now is what defines the ordering with respect to 7971 * these three locks. 7972 */ 7973 dtrace_enabling_matchall(); 7974 } 7975 7976 static void 7977 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 7978 { 7979 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7980 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7981 dof_sec_t *str_sec; 7982 dof_provider_t *provider; 7983 char *strtab; 7984 dtrace_helper_provdesc_t dhpv; 7985 dtrace_meta_t *meta = dtrace_meta_pid; 7986 dtrace_mops_t *mops = &meta->dtm_mops; 7987 7988 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 7989 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7990 provider->dofpv_strtab * dof->dofh_secsize); 7991 7992 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 7993 7994 /* 7995 * Create the provider. 7996 */ 7997 dtrace_dofprov2hprov(&dhpv, provider, strtab); 7998 7999 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); 8000 8001 meta->dtm_count--; 8002 } 8003 8004 static void 8005 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) 8006 { 8007 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 8008 dof_hdr_t *dof = (dof_hdr_t *)daddr; 8009 int i; 8010 8011 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 8012 8013 for (i = 0; i < dof->dofh_secnum; i++) { 8014 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 8015 dof->dofh_secoff + i * dof->dofh_secsize); 8016 8017 if (sec->dofs_type != DOF_SECT_PROVIDER) 8018 continue; 8019 8020 dtrace_helper_provider_remove_one(dhp, sec, pid); 8021 } 8022 } 8023 8024 /* 8025 * DTrace Meta Provider-to-Framework API Functions 8026 * 8027 * These functions implement the Meta Provider-to-Framework API, as described 8028 * in <sys/dtrace.h>. 8029 */ 8030 int 8031 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, 8032 dtrace_meta_provider_id_t *idp) 8033 { 8034 dtrace_meta_t *meta; 8035 dtrace_helpers_t *help, *next; 8036 int i; 8037 8038 *idp = DTRACE_METAPROVNONE; 8039 8040 /* 8041 * We strictly don't need the name, but we hold onto it for 8042 * debuggability. All hail error queues! 8043 */ 8044 if (name == NULL) { 8045 cmn_err(CE_WARN, "failed to register meta-provider: " 8046 "invalid name"); 8047 return (EINVAL); 8048 } 8049 8050 if (mops == NULL || 8051 mops->dtms_create_probe == NULL || 8052 mops->dtms_provide_pid == NULL || 8053 mops->dtms_remove_pid == NULL) { 8054 cmn_err(CE_WARN, "failed to register meta-register %s: " 8055 "invalid ops", name); 8056 return (EINVAL); 8057 } 8058 8059 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); 8060 meta->dtm_mops = *mops; 8061 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 8062 (void) strcpy(meta->dtm_name, name); 8063 meta->dtm_arg = arg; 8064 8065 mutex_enter(&dtrace_meta_lock); 8066 mutex_enter(&dtrace_lock); 8067 8068 if (dtrace_meta_pid != NULL) { 8069 mutex_exit(&dtrace_lock); 8070 mutex_exit(&dtrace_meta_lock); 8071 cmn_err(CE_WARN, "failed to register meta-register %s: " 8072 "user-land meta-provider exists", name); 8073 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); 8074 kmem_free(meta, sizeof (dtrace_meta_t)); 8075 return (EINVAL); 8076 } 8077 8078 dtrace_meta_pid = meta; 8079 *idp = (dtrace_meta_provider_id_t)meta; 8080 8081 /* 8082 * If there are providers and probes ready to go, pass them 8083 * off to the new meta provider now. 8084 */ 8085 8086 help = dtrace_deferred_pid; 8087 dtrace_deferred_pid = NULL; 8088 8089 mutex_exit(&dtrace_lock); 8090 8091 while (help != NULL) { 8092 for (i = 0; i < help->dthps_nprovs; i++) { 8093 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 8094 help->dthps_pid); 8095 } 8096 8097 next = help->dthps_next; 8098 help->dthps_next = NULL; 8099 help->dthps_prev = NULL; 8100 help->dthps_deferred = 0; 8101 help = next; 8102 } 8103 8104 mutex_exit(&dtrace_meta_lock); 8105 8106 return (0); 8107 } 8108 8109 int 8110 dtrace_meta_unregister(dtrace_meta_provider_id_t id) 8111 { 8112 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id; 8113 8114 mutex_enter(&dtrace_meta_lock); 8115 mutex_enter(&dtrace_lock); 8116 8117 if (old == dtrace_meta_pid) { 8118 pp = &dtrace_meta_pid; 8119 } else { 8120 panic("attempt to unregister non-existent " 8121 "dtrace meta-provider %p\n", (void *)old); 8122 } 8123 8124 if (old->dtm_count != 0) { 8125 mutex_exit(&dtrace_lock); 8126 mutex_exit(&dtrace_meta_lock); 8127 return (EBUSY); 8128 } 8129 8130 *pp = NULL; 8131 8132 mutex_exit(&dtrace_lock); 8133 mutex_exit(&dtrace_meta_lock); 8134 8135 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); 8136 kmem_free(old, sizeof (dtrace_meta_t)); 8137 8138 return (0); 8139 } 8140 8141 8142 /* 8143 * DTrace DIF Object Functions 8144 */ 8145 static int 8146 dtrace_difo_err(uint_t pc, const char *format, ...) 8147 { 8148 if (dtrace_err_verbose) { 8149 va_list alist; 8150 8151 (void) uprintf("dtrace DIF object error: [%u]: ", pc); 8152 va_start(alist, format); 8153 (void) vuprintf(format, alist); 8154 va_end(alist); 8155 } 8156 8157 #ifdef DTRACE_ERRDEBUG 8158 dtrace_errdebug(format); 8159 #endif 8160 return (1); 8161 } 8162 8163 /* 8164 * Validate a DTrace DIF object by checking the IR instructions. The following 8165 * rules are currently enforced by dtrace_difo_validate(): 8166 * 8167 * 1. Each instruction must have a valid opcode 8168 * 2. Each register, string, variable, or subroutine reference must be valid 8169 * 3. No instruction can modify register %r0 (must be zero) 8170 * 4. All instruction reserved bits must be set to zero 8171 * 5. The last instruction must be a "ret" instruction 8172 * 6. All branch targets must reference a valid instruction _after_ the branch 8173 */ 8174 static int 8175 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, 8176 cred_t *cr) 8177 { 8178 int err = 0, i; 8179 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 8180 int kcheckload; 8181 uint_t pc; 8182 8183 kcheckload = cr == NULL || 8184 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; 8185 8186 dp->dtdo_destructive = 0; 8187 8188 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { 8189 dif_instr_t instr = dp->dtdo_buf[pc]; 8190 8191 uint_t r1 = DIF_INSTR_R1(instr); 8192 uint_t r2 = DIF_INSTR_R2(instr); 8193 uint_t rd = DIF_INSTR_RD(instr); 8194 uint_t rs = DIF_INSTR_RS(instr); 8195 uint_t label = DIF_INSTR_LABEL(instr); 8196 uint_t v = DIF_INSTR_VAR(instr); 8197 uint_t subr = DIF_INSTR_SUBR(instr); 8198 uint_t type = DIF_INSTR_TYPE(instr); 8199 uint_t op = DIF_INSTR_OP(instr); 8200 8201 switch (op) { 8202 case DIF_OP_OR: 8203 case DIF_OP_XOR: 8204 case DIF_OP_AND: 8205 case DIF_OP_SLL: 8206 case DIF_OP_SRL: 8207 case DIF_OP_SRA: 8208 case DIF_OP_SUB: 8209 case DIF_OP_ADD: 8210 case DIF_OP_MUL: 8211 case DIF_OP_SDIV: 8212 case DIF_OP_UDIV: 8213 case DIF_OP_SREM: 8214 case DIF_OP_UREM: 8215 case DIF_OP_COPYS: 8216 if (r1 >= nregs) 8217 err += efunc(pc, "invalid register %u\n", r1); 8218 if (r2 >= nregs) 8219 err += efunc(pc, "invalid register %u\n", r2); 8220 if (rd >= nregs) 8221 err += efunc(pc, "invalid register %u\n", rd); 8222 if (rd == 0) 8223 err += efunc(pc, "cannot write to %r0\n"); 8224 break; 8225 case DIF_OP_NOT: 8226 case DIF_OP_MOV: 8227 case DIF_OP_ALLOCS: 8228 if (r1 >= nregs) 8229 err += efunc(pc, "invalid register %u\n", r1); 8230 if (r2 != 0) 8231 err += efunc(pc, "non-zero reserved bits\n"); 8232 if (rd >= nregs) 8233 err += efunc(pc, "invalid register %u\n", rd); 8234 if (rd == 0) 8235 err += efunc(pc, "cannot write to %r0\n"); 8236 break; 8237 case DIF_OP_LDSB: 8238 case DIF_OP_LDSH: 8239 case DIF_OP_LDSW: 8240 case DIF_OP_LDUB: 8241 case DIF_OP_LDUH: 8242 case DIF_OP_LDUW: 8243 case DIF_OP_LDX: 8244 if (r1 >= nregs) 8245 err += efunc(pc, "invalid register %u\n", r1); 8246 if (r2 != 0) 8247 err += efunc(pc, "non-zero reserved bits\n"); 8248 if (rd >= nregs) 8249 err += efunc(pc, "invalid register %u\n", rd); 8250 if (rd == 0) 8251 err += efunc(pc, "cannot write to %r0\n"); 8252 if (kcheckload) 8253 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + 8254 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); 8255 break; 8256 case DIF_OP_RLDSB: 8257 case DIF_OP_RLDSH: 8258 case DIF_OP_RLDSW: 8259 case DIF_OP_RLDUB: 8260 case DIF_OP_RLDUH: 8261 case DIF_OP_RLDUW: 8262 case DIF_OP_RLDX: 8263 if (r1 >= nregs) 8264 err += efunc(pc, "invalid register %u\n", r1); 8265 if (r2 != 0) 8266 err += efunc(pc, "non-zero reserved bits\n"); 8267 if (rd >= nregs) 8268 err += efunc(pc, "invalid register %u\n", rd); 8269 if (rd == 0) 8270 err += efunc(pc, "cannot write to %r0\n"); 8271 break; 8272 case DIF_OP_ULDSB: 8273 case DIF_OP_ULDSH: 8274 case DIF_OP_ULDSW: 8275 case DIF_OP_ULDUB: 8276 case DIF_OP_ULDUH: 8277 case DIF_OP_ULDUW: 8278 case DIF_OP_ULDX: 8279 if (r1 >= nregs) 8280 err += efunc(pc, "invalid register %u\n", r1); 8281 if (r2 != 0) 8282 err += efunc(pc, "non-zero reserved bits\n"); 8283 if (rd >= nregs) 8284 err += efunc(pc, "invalid register %u\n", rd); 8285 if (rd == 0) 8286 err += efunc(pc, "cannot write to %r0\n"); 8287 break; 8288 case DIF_OP_STB: 8289 case DIF_OP_STH: 8290 case DIF_OP_STW: 8291 case DIF_OP_STX: 8292 if (r1 >= nregs) 8293 err += efunc(pc, "invalid register %u\n", r1); 8294 if (r2 != 0) 8295 err += efunc(pc, "non-zero reserved bits\n"); 8296 if (rd >= nregs) 8297 err += efunc(pc, "invalid register %u\n", rd); 8298 if (rd == 0) 8299 err += efunc(pc, "cannot write to 0 address\n"); 8300 break; 8301 case DIF_OP_CMP: 8302 case DIF_OP_SCMP: 8303 if (r1 >= nregs) 8304 err += efunc(pc, "invalid register %u\n", r1); 8305 if (r2 >= nregs) 8306 err += efunc(pc, "invalid register %u\n", r2); 8307 if (rd != 0) 8308 err += efunc(pc, "non-zero reserved bits\n"); 8309 break; 8310 case DIF_OP_TST: 8311 if (r1 >= nregs) 8312 err += efunc(pc, "invalid register %u\n", r1); 8313 if (r2 != 0 || rd != 0) 8314 err += efunc(pc, "non-zero reserved bits\n"); 8315 break; 8316 case DIF_OP_BA: 8317 case DIF_OP_BE: 8318 case DIF_OP_BNE: 8319 case DIF_OP_BG: 8320 case DIF_OP_BGU: 8321 case DIF_OP_BGE: 8322 case DIF_OP_BGEU: 8323 case DIF_OP_BL: 8324 case DIF_OP_BLU: 8325 case DIF_OP_BLE: 8326 case DIF_OP_BLEU: 8327 if (label >= dp->dtdo_len) { 8328 err += efunc(pc, "invalid branch target %u\n", 8329 label); 8330 } 8331 if (label <= pc) { 8332 err += efunc(pc, "backward branch to %u\n", 8333 label); 8334 } 8335 break; 8336 case DIF_OP_RET: 8337 if (r1 != 0 || r2 != 0) 8338 err += efunc(pc, "non-zero reserved bits\n"); 8339 if (rd >= nregs) 8340 err += efunc(pc, "invalid register %u\n", rd); 8341 break; 8342 case DIF_OP_NOP: 8343 case DIF_OP_POPTS: 8344 case DIF_OP_FLUSHTS: 8345 if (r1 != 0 || r2 != 0 || rd != 0) 8346 err += efunc(pc, "non-zero reserved bits\n"); 8347 break; 8348 case DIF_OP_SETX: 8349 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) { 8350 err += efunc(pc, "invalid integer ref %u\n", 8351 DIF_INSTR_INTEGER(instr)); 8352 } 8353 if (rd >= nregs) 8354 err += efunc(pc, "invalid register %u\n", rd); 8355 if (rd == 0) 8356 err += efunc(pc, "cannot write to %r0\n"); 8357 break; 8358 case DIF_OP_SETS: 8359 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { 8360 err += efunc(pc, "invalid string ref %u\n", 8361 DIF_INSTR_STRING(instr)); 8362 } 8363 if (rd >= nregs) 8364 err += efunc(pc, "invalid register %u\n", rd); 8365 if (rd == 0) 8366 err += efunc(pc, "cannot write to %r0\n"); 8367 break; 8368 case DIF_OP_LDGA: 8369 case DIF_OP_LDTA: 8370 if (r1 > DIF_VAR_ARRAY_MAX) 8371 err += efunc(pc, "invalid array %u\n", r1); 8372 if (r2 >= nregs) 8373 err += efunc(pc, "invalid register %u\n", r2); 8374 if (rd >= nregs) 8375 err += efunc(pc, "invalid register %u\n", rd); 8376 if (rd == 0) 8377 err += efunc(pc, "cannot write to %r0\n"); 8378 break; 8379 case DIF_OP_LDGS: 8380 case DIF_OP_LDTS: 8381 case DIF_OP_LDLS: 8382 case DIF_OP_LDGAA: 8383 case DIF_OP_LDTAA: 8384 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) 8385 err += efunc(pc, "invalid variable %u\n", v); 8386 if (rd >= nregs) 8387 err += efunc(pc, "invalid register %u\n", rd); 8388 if (rd == 0) 8389 err += efunc(pc, "cannot write to %r0\n"); 8390 break; 8391 case DIF_OP_STGS: 8392 case DIF_OP_STTS: 8393 case DIF_OP_STLS: 8394 case DIF_OP_STGAA: 8395 case DIF_OP_STTAA: 8396 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) 8397 err += efunc(pc, "invalid variable %u\n", v); 8398 if (rs >= nregs) 8399 err += efunc(pc, "invalid register %u\n", rd); 8400 break; 8401 case DIF_OP_CALL: 8402 if (subr > DIF_SUBR_MAX) 8403 err += efunc(pc, "invalid subr %u\n", subr); 8404 if (rd >= nregs) 8405 err += efunc(pc, "invalid register %u\n", rd); 8406 if (rd == 0) 8407 err += efunc(pc, "cannot write to %r0\n"); 8408 8409 if (subr == DIF_SUBR_COPYOUT || 8410 subr == DIF_SUBR_COPYOUTSTR) { 8411 dp->dtdo_destructive = 1; 8412 } 8413 break; 8414 case DIF_OP_PUSHTR: 8415 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) 8416 err += efunc(pc, "invalid ref type %u\n", type); 8417 if (r2 >= nregs) 8418 err += efunc(pc, "invalid register %u\n", r2); 8419 if (rs >= nregs) 8420 err += efunc(pc, "invalid register %u\n", rs); 8421 break; 8422 case DIF_OP_PUSHTV: 8423 if (type != DIF_TYPE_CTF) 8424 err += efunc(pc, "invalid val type %u\n", type); 8425 if (r2 >= nregs) 8426 err += efunc(pc, "invalid register %u\n", r2); 8427 if (rs >= nregs) 8428 err += efunc(pc, "invalid register %u\n", rs); 8429 break; 8430 default: 8431 err += efunc(pc, "invalid opcode %u\n", 8432 DIF_INSTR_OP(instr)); 8433 } 8434 } 8435 8436 if (dp->dtdo_len != 0 && 8437 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { 8438 err += efunc(dp->dtdo_len - 1, 8439 "expected 'ret' as last DIF instruction\n"); 8440 } 8441 8442 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { 8443 /* 8444 * If we're not returning by reference, the size must be either 8445 * 0 or the size of one of the base types. 8446 */ 8447 switch (dp->dtdo_rtype.dtdt_size) { 8448 case 0: 8449 case sizeof (uint8_t): 8450 case sizeof (uint16_t): 8451 case sizeof (uint32_t): 8452 case sizeof (uint64_t): 8453 break; 8454 8455 default: 8456 err += efunc(dp->dtdo_len - 1, "bad return size\n"); 8457 } 8458 } 8459 8460 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { 8461 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; 8462 dtrace_diftype_t *vt, *et; 8463 uint_t id, ndx; 8464 8465 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && 8466 v->dtdv_scope != DIFV_SCOPE_THREAD && 8467 v->dtdv_scope != DIFV_SCOPE_LOCAL) { 8468 err += efunc(i, "unrecognized variable scope %d\n", 8469 v->dtdv_scope); 8470 break; 8471 } 8472 8473 if (v->dtdv_kind != DIFV_KIND_ARRAY && 8474 v->dtdv_kind != DIFV_KIND_SCALAR) { 8475 err += efunc(i, "unrecognized variable type %d\n", 8476 v->dtdv_kind); 8477 break; 8478 } 8479 8480 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) { 8481 err += efunc(i, "%d exceeds variable id limit\n", id); 8482 break; 8483 } 8484 8485 if (id < DIF_VAR_OTHER_UBASE) 8486 continue; 8487 8488 /* 8489 * For user-defined variables, we need to check that this 8490 * definition is identical to any previous definition that we 8491 * encountered. 8492 */ 8493 ndx = id - DIF_VAR_OTHER_UBASE; 8494 8495 switch (v->dtdv_scope) { 8496 case DIFV_SCOPE_GLOBAL: 8497 if (ndx < vstate->dtvs_nglobals) { 8498 dtrace_statvar_t *svar; 8499 8500 if ((svar = vstate->dtvs_globals[ndx]) != NULL) 8501 existing = &svar->dtsv_var; 8502 } 8503 8504 break; 8505 8506 case DIFV_SCOPE_THREAD: 8507 if (ndx < vstate->dtvs_ntlocals) 8508 existing = &vstate->dtvs_tlocals[ndx]; 8509 break; 8510 8511 case DIFV_SCOPE_LOCAL: 8512 if (ndx < vstate->dtvs_nlocals) { 8513 dtrace_statvar_t *svar; 8514 8515 if ((svar = vstate->dtvs_locals[ndx]) != NULL) 8516 existing = &svar->dtsv_var; 8517 } 8518 8519 break; 8520 } 8521 8522 vt = &v->dtdv_type; 8523 8524 if (vt->dtdt_flags & DIF_TF_BYREF) { 8525 if (vt->dtdt_size == 0) { 8526 err += efunc(i, "zero-sized variable\n"); 8527 break; 8528 } 8529 8530 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && 8531 vt->dtdt_size > dtrace_global_maxsize) { 8532 err += efunc(i, "oversized by-ref global\n"); 8533 break; 8534 } 8535 } 8536 8537 if (existing == NULL || existing->dtdv_id == 0) 8538 continue; 8539 8540 ASSERT(existing->dtdv_id == v->dtdv_id); 8541 ASSERT(existing->dtdv_scope == v->dtdv_scope); 8542 8543 if (existing->dtdv_kind != v->dtdv_kind) 8544 err += efunc(i, "%d changed variable kind\n", id); 8545 8546 et = &existing->dtdv_type; 8547 8548 if (vt->dtdt_flags != et->dtdt_flags) { 8549 err += efunc(i, "%d changed variable type flags\n", id); 8550 break; 8551 } 8552 8553 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { 8554 err += efunc(i, "%d changed variable type size\n", id); 8555 break; 8556 } 8557 } 8558 8559 return (err); 8560 } 8561 8562 /* 8563 * Validate a DTrace DIF object that it is to be used as a helper. Helpers 8564 * are much more constrained than normal DIFOs. Specifically, they may 8565 * not: 8566 * 8567 * 1. Make calls to subroutines other than copyin(), copyinstr() or 8568 * miscellaneous string routines 8569 * 2. Access DTrace variables other than the args[] array, and the 8570 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 8571 * 3. Have thread-local variables. 8572 * 4. Have dynamic variables. 8573 */ 8574 static int 8575 dtrace_difo_validate_helper(dtrace_difo_t *dp) 8576 { 8577 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 8578 int err = 0; 8579 uint_t pc; 8580 8581 for (pc = 0; pc < dp->dtdo_len; pc++) { 8582 dif_instr_t instr = dp->dtdo_buf[pc]; 8583 8584 uint_t v = DIF_INSTR_VAR(instr); 8585 uint_t subr = DIF_INSTR_SUBR(instr); 8586 uint_t op = DIF_INSTR_OP(instr); 8587 8588 switch (op) { 8589 case DIF_OP_OR: 8590 case DIF_OP_XOR: 8591 case DIF_OP_AND: 8592 case DIF_OP_SLL: 8593 case DIF_OP_SRL: 8594 case DIF_OP_SRA: 8595 case DIF_OP_SUB: 8596 case DIF_OP_ADD: 8597 case DIF_OP_MUL: 8598 case DIF_OP_SDIV: 8599 case DIF_OP_UDIV: 8600 case DIF_OP_SREM: 8601 case DIF_OP_UREM: 8602 case DIF_OP_COPYS: 8603 case DIF_OP_NOT: 8604 case DIF_OP_MOV: 8605 case DIF_OP_RLDSB: 8606 case DIF_OP_RLDSH: 8607 case DIF_OP_RLDSW: 8608 case DIF_OP_RLDUB: 8609 case DIF_OP_RLDUH: 8610 case DIF_OP_RLDUW: 8611 case DIF_OP_RLDX: 8612 case DIF_OP_ULDSB: 8613 case DIF_OP_ULDSH: 8614 case DIF_OP_ULDSW: 8615 case DIF_OP_ULDUB: 8616 case DIF_OP_ULDUH: 8617 case DIF_OP_ULDUW: 8618 case DIF_OP_ULDX: 8619 case DIF_OP_STB: 8620 case DIF_OP_STH: 8621 case DIF_OP_STW: 8622 case DIF_OP_STX: 8623 case DIF_OP_ALLOCS: 8624 case DIF_OP_CMP: 8625 case DIF_OP_SCMP: 8626 case DIF_OP_TST: 8627 case DIF_OP_BA: 8628 case DIF_OP_BE: 8629 case DIF_OP_BNE: 8630 case DIF_OP_BG: 8631 case DIF_OP_BGU: 8632 case DIF_OP_BGE: 8633 case DIF_OP_BGEU: 8634 case DIF_OP_BL: 8635 case DIF_OP_BLU: 8636 case DIF_OP_BLE: 8637 case DIF_OP_BLEU: 8638 case DIF_OP_RET: 8639 case DIF_OP_NOP: 8640 case DIF_OP_POPTS: 8641 case DIF_OP_FLUSHTS: 8642 case DIF_OP_SETX: 8643 case DIF_OP_SETS: 8644 case DIF_OP_LDGA: 8645 case DIF_OP_LDLS: 8646 case DIF_OP_STGS: 8647 case DIF_OP_STLS: 8648 case DIF_OP_PUSHTR: 8649 case DIF_OP_PUSHTV: 8650 break; 8651 8652 case DIF_OP_LDGS: 8653 if (v >= DIF_VAR_OTHER_UBASE) 8654 break; 8655 8656 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) 8657 break; 8658 8659 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || 8660 v == DIF_VAR_PPID || v == DIF_VAR_TID || 8661 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || 8662 v == DIF_VAR_UID || v == DIF_VAR_GID) 8663 break; 8664 8665 err += efunc(pc, "illegal variable %u\n", v); 8666 break; 8667 8668 case DIF_OP_LDTA: 8669 case DIF_OP_LDTS: 8670 case DIF_OP_LDGAA: 8671 case DIF_OP_LDTAA: 8672 err += efunc(pc, "illegal dynamic variable load\n"); 8673 break; 8674 8675 case DIF_OP_STTS: 8676 case DIF_OP_STGAA: 8677 case DIF_OP_STTAA: 8678 err += efunc(pc, "illegal dynamic variable store\n"); 8679 break; 8680 8681 case DIF_OP_CALL: 8682 if (subr == DIF_SUBR_ALLOCA || 8683 subr == DIF_SUBR_BCOPY || 8684 subr == DIF_SUBR_COPYIN || 8685 subr == DIF_SUBR_COPYINTO || 8686 subr == DIF_SUBR_COPYINSTR || 8687 subr == DIF_SUBR_INDEX || 8688 subr == DIF_SUBR_INET_NTOA || 8689 subr == DIF_SUBR_INET_NTOA6 || 8690 subr == DIF_SUBR_INET_NTOP || 8691 subr == DIF_SUBR_LLTOSTR || 8692 subr == DIF_SUBR_RINDEX || 8693 subr == DIF_SUBR_STRCHR || 8694 subr == DIF_SUBR_STRJOIN || 8695 subr == DIF_SUBR_STRRCHR || 8696 subr == DIF_SUBR_STRSTR || 8697 subr == DIF_SUBR_HTONS || 8698 subr == DIF_SUBR_HTONL || 8699 subr == DIF_SUBR_HTONLL || 8700 subr == DIF_SUBR_NTOHS || 8701 subr == DIF_SUBR_NTOHL || 8702 subr == DIF_SUBR_NTOHLL) 8703 break; 8704 8705 err += efunc(pc, "invalid subr %u\n", subr); 8706 break; 8707 8708 default: 8709 err += efunc(pc, "invalid opcode %u\n", 8710 DIF_INSTR_OP(instr)); 8711 } 8712 } 8713 8714 return (err); 8715 } 8716 8717 /* 8718 * Returns 1 if the expression in the DIF object can be cached on a per-thread 8719 * basis; 0 if not. 8720 */ 8721 static int 8722 dtrace_difo_cacheable(dtrace_difo_t *dp) 8723 { 8724 int i; 8725 8726 if (dp == NULL) 8727 return (0); 8728 8729 for (i = 0; i < dp->dtdo_varlen; i++) { 8730 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8731 8732 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) 8733 continue; 8734 8735 switch (v->dtdv_id) { 8736 case DIF_VAR_CURTHREAD: 8737 case DIF_VAR_PID: 8738 case DIF_VAR_TID: 8739 case DIF_VAR_EXECNAME: 8740 case DIF_VAR_ZONENAME: 8741 break; 8742 8743 default: 8744 return (0); 8745 } 8746 } 8747 8748 /* 8749 * This DIF object may be cacheable. Now we need to look for any 8750 * array loading instructions, any memory loading instructions, or 8751 * any stores to thread-local variables. 8752 */ 8753 for (i = 0; i < dp->dtdo_len; i++) { 8754 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); 8755 8756 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || 8757 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || 8758 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || 8759 op == DIF_OP_LDGA || op == DIF_OP_STTS) 8760 return (0); 8761 } 8762 8763 return (1); 8764 } 8765 8766 static void 8767 dtrace_difo_hold(dtrace_difo_t *dp) 8768 { 8769 int i; 8770 8771 ASSERT(MUTEX_HELD(&dtrace_lock)); 8772 8773 dp->dtdo_refcnt++; 8774 ASSERT(dp->dtdo_refcnt != 0); 8775 8776 /* 8777 * We need to check this DIF object for references to the variable 8778 * DIF_VAR_VTIMESTAMP. 8779 */ 8780 for (i = 0; i < dp->dtdo_varlen; i++) { 8781 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8782 8783 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 8784 continue; 8785 8786 if (dtrace_vtime_references++ == 0) 8787 dtrace_vtime_enable(); 8788 } 8789 } 8790 8791 /* 8792 * This routine calculates the dynamic variable chunksize for a given DIF 8793 * object. The calculation is not fool-proof, and can probably be tricked by 8794 * malicious DIF -- but it works for all compiler-generated DIF. Because this 8795 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 8796 * if a dynamic variable size exceeds the chunksize. 8797 */ 8798 static void 8799 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8800 { 8801 uint64_t sval; 8802 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 8803 const dif_instr_t *text = dp->dtdo_buf; 8804 uint_t pc, srd = 0; 8805 uint_t ttop = 0; 8806 size_t size, ksize; 8807 uint_t id, i; 8808 8809 for (pc = 0; pc < dp->dtdo_len; pc++) { 8810 dif_instr_t instr = text[pc]; 8811 uint_t op = DIF_INSTR_OP(instr); 8812 uint_t rd = DIF_INSTR_RD(instr); 8813 uint_t r1 = DIF_INSTR_R1(instr); 8814 uint_t nkeys = 0; 8815 uchar_t scope; 8816 8817 dtrace_key_t *key = tupregs; 8818 8819 switch (op) { 8820 case DIF_OP_SETX: 8821 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; 8822 srd = rd; 8823 continue; 8824 8825 case DIF_OP_STTS: 8826 key = &tupregs[DIF_DTR_NREGS]; 8827 key[0].dttk_size = 0; 8828 key[1].dttk_size = 0; 8829 nkeys = 2; 8830 scope = DIFV_SCOPE_THREAD; 8831 break; 8832 8833 case DIF_OP_STGAA: 8834 case DIF_OP_STTAA: 8835 nkeys = ttop; 8836 8837 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) 8838 key[nkeys++].dttk_size = 0; 8839 8840 key[nkeys++].dttk_size = 0; 8841 8842 if (op == DIF_OP_STTAA) { 8843 scope = DIFV_SCOPE_THREAD; 8844 } else { 8845 scope = DIFV_SCOPE_GLOBAL; 8846 } 8847 8848 break; 8849 8850 case DIF_OP_PUSHTR: 8851 if (ttop == DIF_DTR_NREGS) 8852 return; 8853 8854 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) { 8855 /* 8856 * If the register for the size of the "pushtr" 8857 * is %r0 (or the value is 0) and the type is 8858 * a string, we'll use the system-wide default 8859 * string size. 8860 */ 8861 tupregs[ttop++].dttk_size = 8862 dtrace_strsize_default; 8863 } else { 8864 if (srd == 0) 8865 return; 8866 8867 tupregs[ttop++].dttk_size = sval; 8868 } 8869 8870 break; 8871 8872 case DIF_OP_PUSHTV: 8873 if (ttop == DIF_DTR_NREGS) 8874 return; 8875 8876 tupregs[ttop++].dttk_size = 0; 8877 break; 8878 8879 case DIF_OP_FLUSHTS: 8880 ttop = 0; 8881 break; 8882 8883 case DIF_OP_POPTS: 8884 if (ttop != 0) 8885 ttop--; 8886 break; 8887 } 8888 8889 sval = 0; 8890 srd = 0; 8891 8892 if (nkeys == 0) 8893 continue; 8894 8895 /* 8896 * We have a dynamic variable allocation; calculate its size. 8897 */ 8898 for (ksize = 0, i = 0; i < nkeys; i++) 8899 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 8900 8901 size = sizeof (dtrace_dynvar_t); 8902 size += sizeof (dtrace_key_t) * (nkeys - 1); 8903 size += ksize; 8904 8905 /* 8906 * Now we need to determine the size of the stored data. 8907 */ 8908 id = DIF_INSTR_VAR(instr); 8909 8910 for (i = 0; i < dp->dtdo_varlen; i++) { 8911 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8912 8913 if (v->dtdv_id == id && v->dtdv_scope == scope) { 8914 size += v->dtdv_type.dtdt_size; 8915 break; 8916 } 8917 } 8918 8919 if (i == dp->dtdo_varlen) 8920 return; 8921 8922 /* 8923 * We have the size. If this is larger than the chunk size 8924 * for our dynamic variable state, reset the chunk size. 8925 */ 8926 size = P2ROUNDUP(size, sizeof (uint64_t)); 8927 8928 if (size > vstate->dtvs_dynvars.dtds_chunksize) 8929 vstate->dtvs_dynvars.dtds_chunksize = size; 8930 } 8931 } 8932 8933 static void 8934 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8935 { 8936 int i, oldsvars, osz, nsz, otlocals, ntlocals; 8937 uint_t id; 8938 8939 ASSERT(MUTEX_HELD(&dtrace_lock)); 8940 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); 8941 8942 for (i = 0; i < dp->dtdo_varlen; i++) { 8943 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8944 dtrace_statvar_t *svar, ***svarp; 8945 size_t dsize = 0; 8946 uint8_t scope = v->dtdv_scope; 8947 int *np; 8948 8949 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 8950 continue; 8951 8952 id -= DIF_VAR_OTHER_UBASE; 8953 8954 switch (scope) { 8955 case DIFV_SCOPE_THREAD: 8956 while (id >= (otlocals = vstate->dtvs_ntlocals)) { 8957 dtrace_difv_t *tlocals; 8958 8959 if ((ntlocals = (otlocals << 1)) == 0) 8960 ntlocals = 1; 8961 8962 osz = otlocals * sizeof (dtrace_difv_t); 8963 nsz = ntlocals * sizeof (dtrace_difv_t); 8964 8965 tlocals = kmem_zalloc(nsz, KM_SLEEP); 8966 8967 if (osz != 0) { 8968 bcopy(vstate->dtvs_tlocals, 8969 tlocals, osz); 8970 kmem_free(vstate->dtvs_tlocals, osz); 8971 } 8972 8973 vstate->dtvs_tlocals = tlocals; 8974 vstate->dtvs_ntlocals = ntlocals; 8975 } 8976 8977 vstate->dtvs_tlocals[id] = *v; 8978 continue; 8979 8980 case DIFV_SCOPE_LOCAL: 8981 np = &vstate->dtvs_nlocals; 8982 svarp = &vstate->dtvs_locals; 8983 8984 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 8985 dsize = NCPU * (v->dtdv_type.dtdt_size + 8986 sizeof (uint64_t)); 8987 else 8988 dsize = NCPU * sizeof (uint64_t); 8989 8990 break; 8991 8992 case DIFV_SCOPE_GLOBAL: 8993 np = &vstate->dtvs_nglobals; 8994 svarp = &vstate->dtvs_globals; 8995 8996 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 8997 dsize = v->dtdv_type.dtdt_size + 8998 sizeof (uint64_t); 8999 9000 break; 9001 9002 default: 9003 ASSERT(0); 9004 } 9005 9006 while (id >= (oldsvars = *np)) { 9007 dtrace_statvar_t **statics; 9008 int newsvars, oldsize, newsize; 9009 9010 if ((newsvars = (oldsvars << 1)) == 0) 9011 newsvars = 1; 9012 9013 oldsize = oldsvars * sizeof (dtrace_statvar_t *); 9014 newsize = newsvars * sizeof (dtrace_statvar_t *); 9015 9016 statics = kmem_zalloc(newsize, KM_SLEEP); 9017 9018 if (oldsize != 0) { 9019 bcopy(*svarp, statics, oldsize); 9020 kmem_free(*svarp, oldsize); 9021 } 9022 9023 *svarp = statics; 9024 *np = newsvars; 9025 } 9026 9027 if ((svar = (*svarp)[id]) == NULL) { 9028 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP); 9029 svar->dtsv_var = *v; 9030 9031 if ((svar->dtsv_size = dsize) != 0) { 9032 svar->dtsv_data = (uint64_t)(uintptr_t) 9033 kmem_zalloc(dsize, KM_SLEEP); 9034 } 9035 9036 (*svarp)[id] = svar; 9037 } 9038 9039 svar->dtsv_refcnt++; 9040 } 9041 9042 dtrace_difo_chunksize(dp, vstate); 9043 dtrace_difo_hold(dp); 9044 } 9045 9046 static dtrace_difo_t * 9047 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 9048 { 9049 dtrace_difo_t *new; 9050 size_t sz; 9051 9052 ASSERT(dp->dtdo_buf != NULL); 9053 ASSERT(dp->dtdo_refcnt != 0); 9054 9055 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 9056 9057 ASSERT(dp->dtdo_buf != NULL); 9058 sz = dp->dtdo_len * sizeof (dif_instr_t); 9059 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP); 9060 bcopy(dp->dtdo_buf, new->dtdo_buf, sz); 9061 new->dtdo_len = dp->dtdo_len; 9062 9063 if (dp->dtdo_strtab != NULL) { 9064 ASSERT(dp->dtdo_strlen != 0); 9065 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP); 9066 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen); 9067 new->dtdo_strlen = dp->dtdo_strlen; 9068 } 9069 9070 if (dp->dtdo_inttab != NULL) { 9071 ASSERT(dp->dtdo_intlen != 0); 9072 sz = dp->dtdo_intlen * sizeof (uint64_t); 9073 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP); 9074 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz); 9075 new->dtdo_intlen = dp->dtdo_intlen; 9076 } 9077 9078 if (dp->dtdo_vartab != NULL) { 9079 ASSERT(dp->dtdo_varlen != 0); 9080 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t); 9081 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP); 9082 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz); 9083 new->dtdo_varlen = dp->dtdo_varlen; 9084 } 9085 9086 dtrace_difo_init(new, vstate); 9087 return (new); 9088 } 9089 9090 static void 9091 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 9092 { 9093 int i; 9094 9095 ASSERT(dp->dtdo_refcnt == 0); 9096 9097 for (i = 0; i < dp->dtdo_varlen; i++) { 9098 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 9099 dtrace_statvar_t *svar, **svarp; 9100 uint_t id; 9101 uint8_t scope = v->dtdv_scope; 9102 int *np; 9103 9104 switch (scope) { 9105 case DIFV_SCOPE_THREAD: 9106 continue; 9107 9108 case DIFV_SCOPE_LOCAL: 9109 np = &vstate->dtvs_nlocals; 9110 svarp = vstate->dtvs_locals; 9111 break; 9112 9113 case DIFV_SCOPE_GLOBAL: 9114 np = &vstate->dtvs_nglobals; 9115 svarp = vstate->dtvs_globals; 9116 break; 9117 9118 default: 9119 ASSERT(0); 9120 } 9121 9122 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 9123 continue; 9124 9125 id -= DIF_VAR_OTHER_UBASE; 9126 ASSERT(id < *np); 9127 9128 svar = svarp[id]; 9129 ASSERT(svar != NULL); 9130 ASSERT(svar->dtsv_refcnt > 0); 9131 9132 if (--svar->dtsv_refcnt > 0) 9133 continue; 9134 9135 if (svar->dtsv_size != 0) { 9136 ASSERT(svar->dtsv_data != NULL); 9137 kmem_free((void *)(uintptr_t)svar->dtsv_data, 9138 svar->dtsv_size); 9139 } 9140 9141 kmem_free(svar, sizeof (dtrace_statvar_t)); 9142 svarp[id] = NULL; 9143 } 9144 9145 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 9146 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 9147 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 9148 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 9149 9150 kmem_free(dp, sizeof (dtrace_difo_t)); 9151 } 9152 9153 static void 9154 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 9155 { 9156 int i; 9157 9158 ASSERT(MUTEX_HELD(&dtrace_lock)); 9159 ASSERT(dp->dtdo_refcnt != 0); 9160 9161 for (i = 0; i < dp->dtdo_varlen; i++) { 9162 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 9163 9164 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 9165 continue; 9166 9167 ASSERT(dtrace_vtime_references > 0); 9168 if (--dtrace_vtime_references == 0) 9169 dtrace_vtime_disable(); 9170 } 9171 9172 if (--dp->dtdo_refcnt == 0) 9173 dtrace_difo_destroy(dp, vstate); 9174 } 9175 9176 /* 9177 * DTrace Format Functions 9178 */ 9179 static uint16_t 9180 dtrace_format_add(dtrace_state_t *state, char *str) 9181 { 9182 char *fmt, **new; 9183 uint16_t ndx, len = strlen(str) + 1; 9184 9185 fmt = kmem_zalloc(len, KM_SLEEP); 9186 bcopy(str, fmt, len); 9187 9188 for (ndx = 0; ndx < state->dts_nformats; ndx++) { 9189 if (state->dts_formats[ndx] == NULL) { 9190 state->dts_formats[ndx] = fmt; 9191 return (ndx + 1); 9192 } 9193 } 9194 9195 if (state->dts_nformats == USHRT_MAX) { 9196 /* 9197 * This is only likely if a denial-of-service attack is being 9198 * attempted. As such, it's okay to fail silently here. 9199 */ 9200 kmem_free(fmt, len); 9201 return (0); 9202 } 9203 9204 /* 9205 * For simplicity, we always resize the formats array to be exactly the 9206 * number of formats. 9207 */ 9208 ndx = state->dts_nformats++; 9209 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); 9210 9211 if (state->dts_formats != NULL) { 9212 ASSERT(ndx != 0); 9213 bcopy(state->dts_formats, new, ndx * sizeof (char *)); 9214 kmem_free(state->dts_formats, ndx * sizeof (char *)); 9215 } 9216 9217 state->dts_formats = new; 9218 state->dts_formats[ndx] = fmt; 9219 9220 return (ndx + 1); 9221 } 9222 9223 static void 9224 dtrace_format_remove(dtrace_state_t *state, uint16_t format) 9225 { 9226 char *fmt; 9227 9228 ASSERT(state->dts_formats != NULL); 9229 ASSERT(format <= state->dts_nformats); 9230 ASSERT(state->dts_formats[format - 1] != NULL); 9231 9232 fmt = state->dts_formats[format - 1]; 9233 kmem_free(fmt, strlen(fmt) + 1); 9234 state->dts_formats[format - 1] = NULL; 9235 } 9236 9237 static void 9238 dtrace_format_destroy(dtrace_state_t *state) 9239 { 9240 int i; 9241 9242 if (state->dts_nformats == 0) { 9243 ASSERT(state->dts_formats == NULL); 9244 return; 9245 } 9246 9247 ASSERT(state->dts_formats != NULL); 9248 9249 for (i = 0; i < state->dts_nformats; i++) { 9250 char *fmt = state->dts_formats[i]; 9251 9252 if (fmt == NULL) 9253 continue; 9254 9255 kmem_free(fmt, strlen(fmt) + 1); 9256 } 9257 9258 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); 9259 state->dts_nformats = 0; 9260 state->dts_formats = NULL; 9261 } 9262 9263 /* 9264 * DTrace Predicate Functions 9265 */ 9266 static dtrace_predicate_t * 9267 dtrace_predicate_create(dtrace_difo_t *dp) 9268 { 9269 dtrace_predicate_t *pred; 9270 9271 ASSERT(MUTEX_HELD(&dtrace_lock)); 9272 ASSERT(dp->dtdo_refcnt != 0); 9273 9274 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); 9275 pred->dtp_difo = dp; 9276 pred->dtp_refcnt = 1; 9277 9278 if (!dtrace_difo_cacheable(dp)) 9279 return (pred); 9280 9281 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) { 9282 /* 9283 * This is only theoretically possible -- we have had 2^32 9284 * cacheable predicates on this machine. We cannot allow any 9285 * more predicates to become cacheable: as unlikely as it is, 9286 * there may be a thread caching a (now stale) predicate cache 9287 * ID. (N.B.: the temptation is being successfully resisted to 9288 * have this cmn_err() "Holy shit -- we executed this code!") 9289 */ 9290 return (pred); 9291 } 9292 9293 pred->dtp_cacheid = dtrace_predcache_id++; 9294 9295 return (pred); 9296 } 9297 9298 static void 9299 dtrace_predicate_hold(dtrace_predicate_t *pred) 9300 { 9301 ASSERT(MUTEX_HELD(&dtrace_lock)); 9302 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); 9303 ASSERT(pred->dtp_refcnt > 0); 9304 9305 pred->dtp_refcnt++; 9306 } 9307 9308 static void 9309 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) 9310 { 9311 dtrace_difo_t *dp = pred->dtp_difo; 9312 9313 ASSERT(MUTEX_HELD(&dtrace_lock)); 9314 ASSERT(dp != NULL && dp->dtdo_refcnt != 0); 9315 ASSERT(pred->dtp_refcnt > 0); 9316 9317 if (--pred->dtp_refcnt == 0) { 9318 dtrace_difo_release(pred->dtp_difo, vstate); 9319 kmem_free(pred, sizeof (dtrace_predicate_t)); 9320 } 9321 } 9322 9323 /* 9324 * DTrace Action Description Functions 9325 */ 9326 static dtrace_actdesc_t * 9327 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, 9328 uint64_t uarg, uint64_t arg) 9329 { 9330 dtrace_actdesc_t *act; 9331 9332 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && 9333 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); 9334 9335 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); 9336 act->dtad_kind = kind; 9337 act->dtad_ntuple = ntuple; 9338 act->dtad_uarg = uarg; 9339 act->dtad_arg = arg; 9340 act->dtad_refcnt = 1; 9341 9342 return (act); 9343 } 9344 9345 static void 9346 dtrace_actdesc_hold(dtrace_actdesc_t *act) 9347 { 9348 ASSERT(act->dtad_refcnt >= 1); 9349 act->dtad_refcnt++; 9350 } 9351 9352 static void 9353 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) 9354 { 9355 dtrace_actkind_t kind = act->dtad_kind; 9356 dtrace_difo_t *dp; 9357 9358 ASSERT(act->dtad_refcnt >= 1); 9359 9360 if (--act->dtad_refcnt != 0) 9361 return; 9362 9363 if ((dp = act->dtad_difo) != NULL) 9364 dtrace_difo_release(dp, vstate); 9365 9366 if (DTRACEACT_ISPRINTFLIKE(kind)) { 9367 char *str = (char *)(uintptr_t)act->dtad_arg; 9368 9369 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || 9370 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); 9371 9372 if (str != NULL) 9373 kmem_free(str, strlen(str) + 1); 9374 } 9375 9376 kmem_free(act, sizeof (dtrace_actdesc_t)); 9377 } 9378 9379 /* 9380 * DTrace ECB Functions 9381 */ 9382 static dtrace_ecb_t * 9383 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) 9384 { 9385 dtrace_ecb_t *ecb; 9386 dtrace_epid_t epid; 9387 9388 ASSERT(MUTEX_HELD(&dtrace_lock)); 9389 9390 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); 9391 ecb->dte_predicate = NULL; 9392 ecb->dte_probe = probe; 9393 9394 /* 9395 * The default size is the size of the default action: recording 9396 * the epid. 9397 */ 9398 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); 9399 ecb->dte_alignment = sizeof (dtrace_epid_t); 9400 9401 epid = state->dts_epid++; 9402 9403 if (epid - 1 >= state->dts_necbs) { 9404 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; 9405 int necbs = state->dts_necbs << 1; 9406 9407 ASSERT(epid == state->dts_necbs + 1); 9408 9409 if (necbs == 0) { 9410 ASSERT(oecbs == NULL); 9411 necbs = 1; 9412 } 9413 9414 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP); 9415 9416 if (oecbs != NULL) 9417 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs)); 9418 9419 dtrace_membar_producer(); 9420 state->dts_ecbs = ecbs; 9421 9422 if (oecbs != NULL) { 9423 /* 9424 * If this state is active, we must dtrace_sync() 9425 * before we can free the old dts_ecbs array: we're 9426 * coming in hot, and there may be active ring 9427 * buffer processing (which indexes into the dts_ecbs 9428 * array) on another CPU. 9429 */ 9430 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 9431 dtrace_sync(); 9432 9433 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs)); 9434 } 9435 9436 dtrace_membar_producer(); 9437 state->dts_necbs = necbs; 9438 } 9439 9440 ecb->dte_state = state; 9441 9442 ASSERT(state->dts_ecbs[epid - 1] == NULL); 9443 dtrace_membar_producer(); 9444 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; 9445 9446 return (ecb); 9447 } 9448 9449 static int 9450 dtrace_ecb_enable(dtrace_ecb_t *ecb) 9451 { 9452 dtrace_probe_t *probe = ecb->dte_probe; 9453 9454 ASSERT(MUTEX_HELD(&cpu_lock)); 9455 ASSERT(MUTEX_HELD(&dtrace_lock)); 9456 ASSERT(ecb->dte_next == NULL); 9457 9458 if (probe == NULL) { 9459 /* 9460 * This is the NULL probe -- there's nothing to do. 9461 */ 9462 return (0); 9463 } 9464 9465 if (probe->dtpr_ecb == NULL) { 9466 dtrace_provider_t *prov = probe->dtpr_provider; 9467 9468 /* 9469 * We're the first ECB on this probe. 9470 */ 9471 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; 9472 9473 if (ecb->dte_predicate != NULL) 9474 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; 9475 9476 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, 9477 probe->dtpr_id, probe->dtpr_arg)); 9478 } else { 9479 /* 9480 * This probe is already active. Swing the last pointer to 9481 * point to the new ECB, and issue a dtrace_sync() to assure 9482 * that all CPUs have seen the change. 9483 */ 9484 ASSERT(probe->dtpr_ecb_last != NULL); 9485 probe->dtpr_ecb_last->dte_next = ecb; 9486 probe->dtpr_ecb_last = ecb; 9487 probe->dtpr_predcache = 0; 9488 9489 dtrace_sync(); 9490 return (0); 9491 } 9492 } 9493 9494 static void 9495 dtrace_ecb_resize(dtrace_ecb_t *ecb) 9496 { 9497 uint32_t maxalign = sizeof (dtrace_epid_t); 9498 uint32_t align = sizeof (uint8_t), offs, diff; 9499 dtrace_action_t *act; 9500 int wastuple = 0; 9501 uint32_t aggbase = UINT32_MAX; 9502 dtrace_state_t *state = ecb->dte_state; 9503 9504 /* 9505 * If we record anything, we always record the epid. (And we always 9506 * record it first.) 9507 */ 9508 offs = sizeof (dtrace_epid_t); 9509 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); 9510 9511 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 9512 dtrace_recdesc_t *rec = &act->dta_rec; 9513 9514 if ((align = rec->dtrd_alignment) > maxalign) 9515 maxalign = align; 9516 9517 if (!wastuple && act->dta_intuple) { 9518 /* 9519 * This is the first record in a tuple. Align the 9520 * offset to be at offset 4 in an 8-byte aligned 9521 * block. 9522 */ 9523 diff = offs + sizeof (dtrace_aggid_t); 9524 9525 if (diff = (diff & (sizeof (uint64_t) - 1))) 9526 offs += sizeof (uint64_t) - diff; 9527 9528 aggbase = offs - sizeof (dtrace_aggid_t); 9529 ASSERT(!(aggbase & (sizeof (uint64_t) - 1))); 9530 } 9531 9532 /*LINTED*/ 9533 if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) { 9534 /* 9535 * The current offset is not properly aligned; align it. 9536 */ 9537 offs += align - diff; 9538 } 9539 9540 rec->dtrd_offset = offs; 9541 9542 if (offs + rec->dtrd_size > ecb->dte_needed) { 9543 ecb->dte_needed = offs + rec->dtrd_size; 9544 9545 if (ecb->dte_needed > state->dts_needed) 9546 state->dts_needed = ecb->dte_needed; 9547 } 9548 9549 if (DTRACEACT_ISAGG(act->dta_kind)) { 9550 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 9551 dtrace_action_t *first = agg->dtag_first, *prev; 9552 9553 ASSERT(rec->dtrd_size != 0 && first != NULL); 9554 ASSERT(wastuple); 9555 ASSERT(aggbase != UINT32_MAX); 9556 9557 agg->dtag_base = aggbase; 9558 9559 while ((prev = first->dta_prev) != NULL && 9560 DTRACEACT_ISAGG(prev->dta_kind)) { 9561 agg = (dtrace_aggregation_t *)prev; 9562 first = agg->dtag_first; 9563 } 9564 9565 if (prev != NULL) { 9566 offs = prev->dta_rec.dtrd_offset + 9567 prev->dta_rec.dtrd_size; 9568 } else { 9569 offs = sizeof (dtrace_epid_t); 9570 } 9571 wastuple = 0; 9572 } else { 9573 if (!act->dta_intuple) 9574 ecb->dte_size = offs + rec->dtrd_size; 9575 9576 offs += rec->dtrd_size; 9577 } 9578 9579 wastuple = act->dta_intuple; 9580 } 9581 9582 if ((act = ecb->dte_action) != NULL && 9583 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && 9584 ecb->dte_size == sizeof (dtrace_epid_t)) { 9585 /* 9586 * If the size is still sizeof (dtrace_epid_t), then all 9587 * actions store no data; set the size to 0. 9588 */ 9589 ecb->dte_alignment = maxalign; 9590 ecb->dte_size = 0; 9591 9592 /* 9593 * If the needed space is still sizeof (dtrace_epid_t), then 9594 * all actions need no additional space; set the needed 9595 * size to 0. 9596 */ 9597 if (ecb->dte_needed == sizeof (dtrace_epid_t)) 9598 ecb->dte_needed = 0; 9599 9600 return; 9601 } 9602 9603 /* 9604 * Set our alignment, and make sure that the dte_size and dte_needed 9605 * are aligned to the size of an EPID. 9606 */ 9607 ecb->dte_alignment = maxalign; 9608 ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) & 9609 ~(sizeof (dtrace_epid_t) - 1); 9610 ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) & 9611 ~(sizeof (dtrace_epid_t) - 1); 9612 ASSERT(ecb->dte_size <= ecb->dte_needed); 9613 } 9614 9615 static dtrace_action_t * 9616 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 9617 { 9618 dtrace_aggregation_t *agg; 9619 size_t size = sizeof (uint64_t); 9620 int ntuple = desc->dtad_ntuple; 9621 dtrace_action_t *act; 9622 dtrace_recdesc_t *frec; 9623 dtrace_aggid_t aggid; 9624 dtrace_state_t *state = ecb->dte_state; 9625 9626 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP); 9627 agg->dtag_ecb = ecb; 9628 9629 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); 9630 9631 switch (desc->dtad_kind) { 9632 case DTRACEAGG_MIN: 9633 agg->dtag_initial = INT64_MAX; 9634 agg->dtag_aggregate = dtrace_aggregate_min; 9635 break; 9636 9637 case DTRACEAGG_MAX: 9638 agg->dtag_initial = INT64_MIN; 9639 agg->dtag_aggregate = dtrace_aggregate_max; 9640 break; 9641 9642 case DTRACEAGG_COUNT: 9643 agg->dtag_aggregate = dtrace_aggregate_count; 9644 break; 9645 9646 case DTRACEAGG_QUANTIZE: 9647 agg->dtag_aggregate = dtrace_aggregate_quantize; 9648 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) * 9649 sizeof (uint64_t); 9650 break; 9651 9652 case DTRACEAGG_LQUANTIZE: { 9653 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); 9654 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); 9655 9656 agg->dtag_initial = desc->dtad_arg; 9657 agg->dtag_aggregate = dtrace_aggregate_lquantize; 9658 9659 if (step == 0 || levels == 0) 9660 goto err; 9661 9662 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t); 9663 break; 9664 } 9665 9666 case DTRACEAGG_LLQUANTIZE: { 9667 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); 9668 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); 9669 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); 9670 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); 9671 int64_t v; 9672 9673 agg->dtag_initial = desc->dtad_arg; 9674 agg->dtag_aggregate = dtrace_aggregate_llquantize; 9675 9676 if (factor < 2 || low >= high || nsteps < factor) 9677 goto err; 9678 9679 /* 9680 * Now check that the number of steps evenly divides a power 9681 * of the factor. (This assures both integer bucket size and 9682 * linearity within each magnitude.) 9683 */ 9684 for (v = factor; v < nsteps; v *= factor) 9685 continue; 9686 9687 if ((v % nsteps) || (nsteps % factor)) 9688 goto err; 9689 9690 size = (dtrace_aggregate_llquantize_bucket(factor, 9691 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); 9692 break; 9693 } 9694 9695 case DTRACEAGG_AVG: 9696 agg->dtag_aggregate = dtrace_aggregate_avg; 9697 size = sizeof (uint64_t) * 2; 9698 break; 9699 9700 case DTRACEAGG_STDDEV: 9701 agg->dtag_aggregate = dtrace_aggregate_stddev; 9702 size = sizeof (uint64_t) * 4; 9703 break; 9704 9705 case DTRACEAGG_SUM: 9706 agg->dtag_aggregate = dtrace_aggregate_sum; 9707 break; 9708 9709 default: 9710 goto err; 9711 } 9712 9713 agg->dtag_action.dta_rec.dtrd_size = size; 9714 9715 if (ntuple == 0) 9716 goto err; 9717 9718 /* 9719 * We must make sure that we have enough actions for the n-tuple. 9720 */ 9721 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { 9722 if (DTRACEACT_ISAGG(act->dta_kind)) 9723 break; 9724 9725 if (--ntuple == 0) { 9726 /* 9727 * This is the action with which our n-tuple begins. 9728 */ 9729 agg->dtag_first = act; 9730 goto success; 9731 } 9732 } 9733 9734 /* 9735 * This n-tuple is short by ntuple elements. Return failure. 9736 */ 9737 ASSERT(ntuple != 0); 9738 err: 9739 kmem_free(agg, sizeof (dtrace_aggregation_t)); 9740 return (NULL); 9741 9742 success: 9743 /* 9744 * If the last action in the tuple has a size of zero, it's actually 9745 * an expression argument for the aggregating action. 9746 */ 9747 ASSERT(ecb->dte_action_last != NULL); 9748 act = ecb->dte_action_last; 9749 9750 if (act->dta_kind == DTRACEACT_DIFEXPR) { 9751 ASSERT(act->dta_difo != NULL); 9752 9753 if (act->dta_difo->dtdo_rtype.dtdt_size == 0) 9754 agg->dtag_hasarg = 1; 9755 } 9756 9757 /* 9758 * We need to allocate an id for this aggregation. 9759 */ 9760 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, 9761 VM_BESTFIT | VM_SLEEP); 9762 9763 if (aggid - 1 >= state->dts_naggregations) { 9764 dtrace_aggregation_t **oaggs = state->dts_aggregations; 9765 dtrace_aggregation_t **aggs; 9766 int naggs = state->dts_naggregations << 1; 9767 int onaggs = state->dts_naggregations; 9768 9769 ASSERT(aggid == state->dts_naggregations + 1); 9770 9771 if (naggs == 0) { 9772 ASSERT(oaggs == NULL); 9773 naggs = 1; 9774 } 9775 9776 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP); 9777 9778 if (oaggs != NULL) { 9779 bcopy(oaggs, aggs, onaggs * sizeof (*aggs)); 9780 kmem_free(oaggs, onaggs * sizeof (*aggs)); 9781 } 9782 9783 state->dts_aggregations = aggs; 9784 state->dts_naggregations = naggs; 9785 } 9786 9787 ASSERT(state->dts_aggregations[aggid - 1] == NULL); 9788 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg; 9789 9790 frec = &agg->dtag_first->dta_rec; 9791 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t)) 9792 frec->dtrd_alignment = sizeof (dtrace_aggid_t); 9793 9794 for (act = agg->dtag_first; act != NULL; act = act->dta_next) { 9795 ASSERT(!act->dta_intuple); 9796 act->dta_intuple = 1; 9797 } 9798 9799 return (&agg->dtag_action); 9800 } 9801 9802 static void 9803 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act) 9804 { 9805 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 9806 dtrace_state_t *state = ecb->dte_state; 9807 dtrace_aggid_t aggid = agg->dtag_id; 9808 9809 ASSERT(DTRACEACT_ISAGG(act->dta_kind)); 9810 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1); 9811 9812 ASSERT(state->dts_aggregations[aggid - 1] == agg); 9813 state->dts_aggregations[aggid - 1] = NULL; 9814 9815 kmem_free(agg, sizeof (dtrace_aggregation_t)); 9816 } 9817 9818 static int 9819 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 9820 { 9821 dtrace_action_t *action, *last; 9822 dtrace_difo_t *dp = desc->dtad_difo; 9823 uint32_t size = 0, align = sizeof (uint8_t), mask; 9824 uint16_t format = 0; 9825 dtrace_recdesc_t *rec; 9826 dtrace_state_t *state = ecb->dte_state; 9827 dtrace_optval_t *opt = state->dts_options, nframes, strsize; 9828 uint64_t arg = desc->dtad_arg; 9829 9830 ASSERT(MUTEX_HELD(&dtrace_lock)); 9831 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); 9832 9833 if (DTRACEACT_ISAGG(desc->dtad_kind)) { 9834 /* 9835 * If this is an aggregating action, there must be neither 9836 * a speculate nor a commit on the action chain. 9837 */ 9838 dtrace_action_t *act; 9839 9840 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 9841 if (act->dta_kind == DTRACEACT_COMMIT) 9842 return (EINVAL); 9843 9844 if (act->dta_kind == DTRACEACT_SPECULATE) 9845 return (EINVAL); 9846 } 9847 9848 action = dtrace_ecb_aggregation_create(ecb, desc); 9849 9850 if (action == NULL) 9851 return (EINVAL); 9852 } else { 9853 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || 9854 (desc->dtad_kind == DTRACEACT_DIFEXPR && 9855 dp != NULL && dp->dtdo_destructive)) { 9856 state->dts_destructive = 1; 9857 } 9858 9859 switch (desc->dtad_kind) { 9860 case DTRACEACT_PRINTF: 9861 case DTRACEACT_PRINTA: 9862 case DTRACEACT_SYSTEM: 9863 case DTRACEACT_FREOPEN: 9864 case DTRACEACT_DIFEXPR: 9865 /* 9866 * We know that our arg is a string -- turn it into a 9867 * format. 9868 */ 9869 if (arg == NULL) { 9870 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || 9871 desc->dtad_kind == DTRACEACT_DIFEXPR); 9872 format = 0; 9873 } else { 9874 ASSERT(arg != NULL); 9875 ASSERT(arg > KERNELBASE); 9876 format = dtrace_format_add(state, 9877 (char *)(uintptr_t)arg); 9878 } 9879 9880 /*FALLTHROUGH*/ 9881 case DTRACEACT_LIBACT: 9882 case DTRACEACT_TRACEMEM: 9883 case DTRACEACT_TRACEMEM_DYNSIZE: 9884 if (dp == NULL) 9885 return (EINVAL); 9886 9887 if ((size = dp->dtdo_rtype.dtdt_size) != 0) 9888 break; 9889 9890 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 9891 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9892 return (EINVAL); 9893 9894 size = opt[DTRACEOPT_STRSIZE]; 9895 } 9896 9897 break; 9898 9899 case DTRACEACT_STACK: 9900 if ((nframes = arg) == 0) { 9901 nframes = opt[DTRACEOPT_STACKFRAMES]; 9902 ASSERT(nframes > 0); 9903 arg = nframes; 9904 } 9905 9906 size = nframes * sizeof (pc_t); 9907 break; 9908 9909 case DTRACEACT_JSTACK: 9910 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0) 9911 strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; 9912 9913 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) 9914 nframes = opt[DTRACEOPT_JSTACKFRAMES]; 9915 9916 arg = DTRACE_USTACK_ARG(nframes, strsize); 9917 9918 /*FALLTHROUGH*/ 9919 case DTRACEACT_USTACK: 9920 if (desc->dtad_kind != DTRACEACT_JSTACK && 9921 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { 9922 strsize = DTRACE_USTACK_STRSIZE(arg); 9923 nframes = opt[DTRACEOPT_USTACKFRAMES]; 9924 ASSERT(nframes > 0); 9925 arg = DTRACE_USTACK_ARG(nframes, strsize); 9926 } 9927 9928 /* 9929 * Save a slot for the pid. 9930 */ 9931 size = (nframes + 1) * sizeof (uint64_t); 9932 size += DTRACE_USTACK_STRSIZE(arg); 9933 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t))); 9934 9935 break; 9936 9937 case DTRACEACT_SYM: 9938 case DTRACEACT_MOD: 9939 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != 9940 sizeof (uint64_t)) || 9941 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9942 return (EINVAL); 9943 break; 9944 9945 case DTRACEACT_USYM: 9946 case DTRACEACT_UMOD: 9947 case DTRACEACT_UADDR: 9948 if (dp == NULL || 9949 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) || 9950 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9951 return (EINVAL); 9952 9953 /* 9954 * We have a slot for the pid, plus a slot for the 9955 * argument. To keep things simple (aligned with 9956 * bitness-neutral sizing), we store each as a 64-bit 9957 * quantity. 9958 */ 9959 size = 2 * sizeof (uint64_t); 9960 break; 9961 9962 case DTRACEACT_STOP: 9963 case DTRACEACT_BREAKPOINT: 9964 case DTRACEACT_PANIC: 9965 break; 9966 9967 case DTRACEACT_CHILL: 9968 case DTRACEACT_DISCARD: 9969 case DTRACEACT_RAISE: 9970 if (dp == NULL) 9971 return (EINVAL); 9972 break; 9973 9974 case DTRACEACT_EXIT: 9975 if (dp == NULL || 9976 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) || 9977 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9978 return (EINVAL); 9979 break; 9980 9981 case DTRACEACT_SPECULATE: 9982 if (ecb->dte_size > sizeof (dtrace_epid_t)) 9983 return (EINVAL); 9984 9985 if (dp == NULL) 9986 return (EINVAL); 9987 9988 state->dts_speculates = 1; 9989 break; 9990 9991 case DTRACEACT_COMMIT: { 9992 dtrace_action_t *act = ecb->dte_action; 9993 9994 for (; act != NULL; act = act->dta_next) { 9995 if (act->dta_kind == DTRACEACT_COMMIT) 9996 return (EINVAL); 9997 } 9998 9999 if (dp == NULL) 10000 return (EINVAL); 10001 break; 10002 } 10003 10004 default: 10005 return (EINVAL); 10006 } 10007 10008 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { 10009 /* 10010 * If this is a data-storing action or a speculate, 10011 * we must be sure that there isn't a commit on the 10012 * action chain. 10013 */ 10014 dtrace_action_t *act = ecb->dte_action; 10015 10016 for (; act != NULL; act = act->dta_next) { 10017 if (act->dta_kind == DTRACEACT_COMMIT) 10018 return (EINVAL); 10019 } 10020 } 10021 10022 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP); 10023 action->dta_rec.dtrd_size = size; 10024 } 10025 10026 action->dta_refcnt = 1; 10027 rec = &action->dta_rec; 10028 size = rec->dtrd_size; 10029 10030 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { 10031 if (!(size & mask)) { 10032 align = mask + 1; 10033 break; 10034 } 10035 } 10036 10037 action->dta_kind = desc->dtad_kind; 10038 10039 if ((action->dta_difo = dp) != NULL) 10040 dtrace_difo_hold(dp); 10041 10042 rec->dtrd_action = action->dta_kind; 10043 rec->dtrd_arg = arg; 10044 rec->dtrd_uarg = desc->dtad_uarg; 10045 rec->dtrd_alignment = (uint16_t)align; 10046 rec->dtrd_format = format; 10047 10048 if ((last = ecb->dte_action_last) != NULL) { 10049 ASSERT(ecb->dte_action != NULL); 10050 action->dta_prev = last; 10051 last->dta_next = action; 10052 } else { 10053 ASSERT(ecb->dte_action == NULL); 10054 ecb->dte_action = action; 10055 } 10056 10057 ecb->dte_action_last = action; 10058 10059 return (0); 10060 } 10061 10062 static void 10063 dtrace_ecb_action_remove(dtrace_ecb_t *ecb) 10064 { 10065 dtrace_action_t *act = ecb->dte_action, *next; 10066 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate; 10067 dtrace_difo_t *dp; 10068 uint16_t format; 10069 10070 if (act != NULL && act->dta_refcnt > 1) { 10071 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); 10072 act->dta_refcnt--; 10073 } else { 10074 for (; act != NULL; act = next) { 10075 next = act->dta_next; 10076 ASSERT(next != NULL || act == ecb->dte_action_last); 10077 ASSERT(act->dta_refcnt == 1); 10078 10079 if ((format = act->dta_rec.dtrd_format) != 0) 10080 dtrace_format_remove(ecb->dte_state, format); 10081 10082 if ((dp = act->dta_difo) != NULL) 10083 dtrace_difo_release(dp, vstate); 10084 10085 if (DTRACEACT_ISAGG(act->dta_kind)) { 10086 dtrace_ecb_aggregation_destroy(ecb, act); 10087 } else { 10088 kmem_free(act, sizeof (dtrace_action_t)); 10089 } 10090 } 10091 } 10092 10093 ecb->dte_action = NULL; 10094 ecb->dte_action_last = NULL; 10095 ecb->dte_size = sizeof (dtrace_epid_t); 10096 } 10097 10098 static void 10099 dtrace_ecb_disable(dtrace_ecb_t *ecb) 10100 { 10101 /* 10102 * We disable the ECB by removing it from its probe. 10103 */ 10104 dtrace_ecb_t *pecb, *prev = NULL; 10105 dtrace_probe_t *probe = ecb->dte_probe; 10106 10107 ASSERT(MUTEX_HELD(&dtrace_lock)); 10108 10109 if (probe == NULL) { 10110 /* 10111 * This is the NULL probe; there is nothing to disable. 10112 */ 10113 return; 10114 } 10115 10116 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { 10117 if (pecb == ecb) 10118 break; 10119 prev = pecb; 10120 } 10121 10122 ASSERT(pecb != NULL); 10123 10124 if (prev == NULL) { 10125 probe->dtpr_ecb = ecb->dte_next; 10126 } else { 10127 prev->dte_next = ecb->dte_next; 10128 } 10129 10130 if (ecb == probe->dtpr_ecb_last) { 10131 ASSERT(ecb->dte_next == NULL); 10132 probe->dtpr_ecb_last = prev; 10133 } 10134 10135 /* 10136 * The ECB has been disconnected from the probe; now sync to assure 10137 * that all CPUs have seen the change before returning. 10138 */ 10139 dtrace_sync(); 10140 10141 if (probe->dtpr_ecb == NULL) { 10142 /* 10143 * That was the last ECB on the probe; clear the predicate 10144 * cache ID for the probe, disable it and sync one more time 10145 * to assure that we'll never hit it again. 10146 */ 10147 dtrace_provider_t *prov = probe->dtpr_provider; 10148 10149 ASSERT(ecb->dte_next == NULL); 10150 ASSERT(probe->dtpr_ecb_last == NULL); 10151 probe->dtpr_predcache = DTRACE_CACHEIDNONE; 10152 prov->dtpv_pops.dtps_disable(prov->dtpv_arg, 10153 probe->dtpr_id, probe->dtpr_arg); 10154 dtrace_sync(); 10155 } else { 10156 /* 10157 * There is at least one ECB remaining on the probe. If there 10158 * is _exactly_ one, set the probe's predicate cache ID to be 10159 * the predicate cache ID of the remaining ECB. 10160 */ 10161 ASSERT(probe->dtpr_ecb_last != NULL); 10162 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); 10163 10164 if (probe->dtpr_ecb == probe->dtpr_ecb_last) { 10165 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate; 10166 10167 ASSERT(probe->dtpr_ecb->dte_next == NULL); 10168 10169 if (p != NULL) 10170 probe->dtpr_predcache = p->dtp_cacheid; 10171 } 10172 10173 ecb->dte_next = NULL; 10174 } 10175 } 10176 10177 static void 10178 dtrace_ecb_destroy(dtrace_ecb_t *ecb) 10179 { 10180 dtrace_state_t *state = ecb->dte_state; 10181 dtrace_vstate_t *vstate = &state->dts_vstate; 10182 dtrace_predicate_t *pred; 10183 dtrace_epid_t epid = ecb->dte_epid; 10184 10185 ASSERT(MUTEX_HELD(&dtrace_lock)); 10186 ASSERT(ecb->dte_next == NULL); 10187 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); 10188 10189 if ((pred = ecb->dte_predicate) != NULL) 10190 dtrace_predicate_release(pred, vstate); 10191 10192 dtrace_ecb_action_remove(ecb); 10193 10194 ASSERT(state->dts_ecbs[epid - 1] == ecb); 10195 state->dts_ecbs[epid - 1] = NULL; 10196 10197 kmem_free(ecb, sizeof (dtrace_ecb_t)); 10198 } 10199 10200 static dtrace_ecb_t * 10201 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, 10202 dtrace_enabling_t *enab) 10203 { 10204 dtrace_ecb_t *ecb; 10205 dtrace_predicate_t *pred; 10206 dtrace_actdesc_t *act; 10207 dtrace_provider_t *prov; 10208 dtrace_ecbdesc_t *desc = enab->dten_current; 10209 10210 ASSERT(MUTEX_HELD(&dtrace_lock)); 10211 ASSERT(state != NULL); 10212 10213 ecb = dtrace_ecb_add(state, probe); 10214 ecb->dte_uarg = desc->dted_uarg; 10215 10216 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) { 10217 dtrace_predicate_hold(pred); 10218 ecb->dte_predicate = pred; 10219 } 10220 10221 if (probe != NULL) { 10222 /* 10223 * If the provider shows more leg than the consumer is old 10224 * enough to see, we need to enable the appropriate implicit 10225 * predicate bits to prevent the ecb from activating at 10226 * revealing times. 10227 * 10228 * Providers specifying DTRACE_PRIV_USER at register time 10229 * are stating that they need the /proc-style privilege 10230 * model to be enforced, and this is what DTRACE_COND_OWNER 10231 * and DTRACE_COND_ZONEOWNER will then do at probe time. 10232 */ 10233 prov = probe->dtpr_provider; 10234 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && 10235 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 10236 ecb->dte_cond |= DTRACE_COND_OWNER; 10237 10238 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) && 10239 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 10240 ecb->dte_cond |= DTRACE_COND_ZONEOWNER; 10241 10242 /* 10243 * If the provider shows us kernel innards and the user 10244 * is lacking sufficient privilege, enable the 10245 * DTRACE_COND_USERMODE implicit predicate. 10246 */ 10247 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && 10248 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) 10249 ecb->dte_cond |= DTRACE_COND_USERMODE; 10250 } 10251 10252 if (dtrace_ecb_create_cache != NULL) { 10253 /* 10254 * If we have a cached ecb, we'll use its action list instead 10255 * of creating our own (saving both time and space). 10256 */ 10257 dtrace_ecb_t *cached = dtrace_ecb_create_cache; 10258 dtrace_action_t *act = cached->dte_action; 10259 10260 if (act != NULL) { 10261 ASSERT(act->dta_refcnt > 0); 10262 act->dta_refcnt++; 10263 ecb->dte_action = act; 10264 ecb->dte_action_last = cached->dte_action_last; 10265 ecb->dte_needed = cached->dte_needed; 10266 ecb->dte_size = cached->dte_size; 10267 ecb->dte_alignment = cached->dte_alignment; 10268 } 10269 10270 return (ecb); 10271 } 10272 10273 for (act = desc->dted_action; act != NULL; act = act->dtad_next) { 10274 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) { 10275 dtrace_ecb_destroy(ecb); 10276 return (NULL); 10277 } 10278 } 10279 10280 dtrace_ecb_resize(ecb); 10281 10282 return (dtrace_ecb_create_cache = ecb); 10283 } 10284 10285 static int 10286 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) 10287 { 10288 dtrace_ecb_t *ecb; 10289 dtrace_enabling_t *enab = arg; 10290 dtrace_state_t *state = enab->dten_vstate->dtvs_state; 10291 10292 ASSERT(state != NULL); 10293 10294 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { 10295 /* 10296 * This probe was created in a generation for which this 10297 * enabling has previously created ECBs; we don't want to 10298 * enable it again, so just kick out. 10299 */ 10300 return (DTRACE_MATCH_NEXT); 10301 } 10302 10303 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) 10304 return (DTRACE_MATCH_DONE); 10305 10306 if (dtrace_ecb_enable(ecb) < 0) 10307 return (DTRACE_MATCH_FAIL); 10308 10309 return (DTRACE_MATCH_NEXT); 10310 } 10311 10312 static dtrace_ecb_t * 10313 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) 10314 { 10315 dtrace_ecb_t *ecb; 10316 10317 ASSERT(MUTEX_HELD(&dtrace_lock)); 10318 10319 if (id == 0 || id > state->dts_necbs) 10320 return (NULL); 10321 10322 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); 10323 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id); 10324 10325 return (state->dts_ecbs[id - 1]); 10326 } 10327 10328 static dtrace_aggregation_t * 10329 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) 10330 { 10331 dtrace_aggregation_t *agg; 10332 10333 ASSERT(MUTEX_HELD(&dtrace_lock)); 10334 10335 if (id == 0 || id > state->dts_naggregations) 10336 return (NULL); 10337 10338 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); 10339 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL || 10340 agg->dtag_id == id); 10341 10342 return (state->dts_aggregations[id - 1]); 10343 } 10344 10345 /* 10346 * DTrace Buffer Functions 10347 * 10348 * The following functions manipulate DTrace buffers. Most of these functions 10349 * are called in the context of establishing or processing consumer state; 10350 * exceptions are explicitly noted. 10351 */ 10352 10353 /* 10354 * Note: called from cross call context. This function switches the two 10355 * buffers on a given CPU. The atomicity of this operation is assured by 10356 * disabling interrupts while the actual switch takes place; the disabling of 10357 * interrupts serializes the execution with any execution of dtrace_probe() on 10358 * the same CPU. 10359 */ 10360 static void 10361 dtrace_buffer_switch(dtrace_buffer_t *buf) 10362 { 10363 caddr_t tomax = buf->dtb_tomax; 10364 caddr_t xamot = buf->dtb_xamot; 10365 dtrace_icookie_t cookie; 10366 hrtime_t now = dtrace_gethrtime(); 10367 10368 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 10369 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); 10370 10371 cookie = dtrace_interrupt_disable(); 10372 buf->dtb_tomax = xamot; 10373 buf->dtb_xamot = tomax; 10374 buf->dtb_xamot_drops = buf->dtb_drops; 10375 buf->dtb_xamot_offset = buf->dtb_offset; 10376 buf->dtb_xamot_errors = buf->dtb_errors; 10377 buf->dtb_xamot_flags = buf->dtb_flags; 10378 buf->dtb_offset = 0; 10379 buf->dtb_drops = 0; 10380 buf->dtb_errors = 0; 10381 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); 10382 buf->dtb_interval = now - buf->dtb_switched; 10383 buf->dtb_switched = now; 10384 dtrace_interrupt_enable(cookie); 10385 } 10386 10387 /* 10388 * Note: called from cross call context. This function activates a buffer 10389 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation 10390 * is guaranteed by the disabling of interrupts. 10391 */ 10392 static void 10393 dtrace_buffer_activate(dtrace_state_t *state) 10394 { 10395 dtrace_buffer_t *buf; 10396 dtrace_icookie_t cookie = dtrace_interrupt_disable(); 10397 10398 buf = &state->dts_buffer[CPU->cpu_id]; 10399 10400 if (buf->dtb_tomax != NULL) { 10401 /* 10402 * We might like to assert that the buffer is marked inactive, 10403 * but this isn't necessarily true: the buffer for the CPU 10404 * that processes the BEGIN probe has its buffer activated 10405 * manually. In this case, we take the (harmless) action 10406 * re-clearing the bit INACTIVE bit. 10407 */ 10408 buf->dtb_flags &= ~DTRACEBUF_INACTIVE; 10409 } 10410 10411 dtrace_interrupt_enable(cookie); 10412 } 10413 10414 static int 10415 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, 10416 processorid_t cpu, int *factor) 10417 { 10418 cpu_t *cp; 10419 dtrace_buffer_t *buf; 10420 int allocated = 0, desired = 0; 10421 10422 ASSERT(MUTEX_HELD(&cpu_lock)); 10423 ASSERT(MUTEX_HELD(&dtrace_lock)); 10424 10425 *factor = 1; 10426 10427 if (size > dtrace_nonroot_maxsize && 10428 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) 10429 return (EFBIG); 10430 10431 cp = cpu_list; 10432 10433 do { 10434 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 10435 continue; 10436 10437 buf = &bufs[cp->cpu_id]; 10438 10439 /* 10440 * If there is already a buffer allocated for this CPU, it 10441 * is only possible that this is a DR event. In this case, 10442 * the buffer size must match our specified size. 10443 */ 10444 if (buf->dtb_tomax != NULL) { 10445 ASSERT(buf->dtb_size == size); 10446 continue; 10447 } 10448 10449 ASSERT(buf->dtb_xamot == NULL); 10450 10451 if ((buf->dtb_tomax = kmem_zalloc(size, 10452 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 10453 goto err; 10454 10455 buf->dtb_size = size; 10456 buf->dtb_flags = flags; 10457 buf->dtb_offset = 0; 10458 buf->dtb_drops = 0; 10459 10460 if (flags & DTRACEBUF_NOSWITCH) 10461 continue; 10462 10463 if ((buf->dtb_xamot = kmem_zalloc(size, 10464 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 10465 goto err; 10466 } while ((cp = cp->cpu_next) != cpu_list); 10467 10468 return (0); 10469 10470 err: 10471 cp = cpu_list; 10472 10473 do { 10474 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 10475 continue; 10476 10477 buf = &bufs[cp->cpu_id]; 10478 desired += 2; 10479 10480 if (buf->dtb_xamot != NULL) { 10481 ASSERT(buf->dtb_tomax != NULL); 10482 ASSERT(buf->dtb_size == size); 10483 kmem_free(buf->dtb_xamot, size); 10484 allocated++; 10485 } 10486 10487 if (buf->dtb_tomax != NULL) { 10488 ASSERT(buf->dtb_size == size); 10489 kmem_free(buf->dtb_tomax, size); 10490 allocated++; 10491 } 10492 10493 buf->dtb_tomax = NULL; 10494 buf->dtb_xamot = NULL; 10495 buf->dtb_size = 0; 10496 } while ((cp = cp->cpu_next) != cpu_list); 10497 10498 *factor = desired / (allocated > 0 ? allocated : 1); 10499 10500 return (ENOMEM); 10501 } 10502 10503 /* 10504 * Note: called from probe context. This function just increments the drop 10505 * count on a buffer. It has been made a function to allow for the 10506 * possibility of understanding the source of mysterious drop counts. (A 10507 * problem for which one may be particularly disappointed that DTrace cannot 10508 * be used to understand DTrace.) 10509 */ 10510 static void 10511 dtrace_buffer_drop(dtrace_buffer_t *buf) 10512 { 10513 buf->dtb_drops++; 10514 } 10515 10516 /* 10517 * Note: called from probe context. This function is called to reserve space 10518 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the 10519 * mstate. Returns the new offset in the buffer, or a negative value if an 10520 * error has occurred. 10521 */ 10522 static intptr_t 10523 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, 10524 dtrace_state_t *state, dtrace_mstate_t *mstate) 10525 { 10526 intptr_t offs = buf->dtb_offset, soffs; 10527 intptr_t woffs; 10528 caddr_t tomax; 10529 size_t total; 10530 10531 if (buf->dtb_flags & DTRACEBUF_INACTIVE) 10532 return (-1); 10533 10534 if ((tomax = buf->dtb_tomax) == NULL) { 10535 dtrace_buffer_drop(buf); 10536 return (-1); 10537 } 10538 10539 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { 10540 while (offs & (align - 1)) { 10541 /* 10542 * Assert that our alignment is off by a number which 10543 * is itself sizeof (uint32_t) aligned. 10544 */ 10545 ASSERT(!((align - (offs & (align - 1))) & 10546 (sizeof (uint32_t) - 1))); 10547 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 10548 offs += sizeof (uint32_t); 10549 } 10550 10551 if ((soffs = offs + needed) > buf->dtb_size) { 10552 dtrace_buffer_drop(buf); 10553 return (-1); 10554 } 10555 10556 if (mstate == NULL) 10557 return (offs); 10558 10559 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; 10560 mstate->dtms_scratch_size = buf->dtb_size - soffs; 10561 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 10562 10563 return (offs); 10564 } 10565 10566 if (buf->dtb_flags & DTRACEBUF_FILL) { 10567 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && 10568 (buf->dtb_flags & DTRACEBUF_FULL)) 10569 return (-1); 10570 goto out; 10571 } 10572 10573 total = needed + (offs & (align - 1)); 10574 10575 /* 10576 * For a ring buffer, life is quite a bit more complicated. Before 10577 * we can store any padding, we need to adjust our wrapping offset. 10578 * (If we've never before wrapped or we're not about to, no adjustment 10579 * is required.) 10580 */ 10581 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || 10582 offs + total > buf->dtb_size) { 10583 woffs = buf->dtb_xamot_offset; 10584 10585 if (offs + total > buf->dtb_size) { 10586 /* 10587 * We can't fit in the end of the buffer. First, a 10588 * sanity check that we can fit in the buffer at all. 10589 */ 10590 if (total > buf->dtb_size) { 10591 dtrace_buffer_drop(buf); 10592 return (-1); 10593 } 10594 10595 /* 10596 * We're going to be storing at the top of the buffer, 10597 * so now we need to deal with the wrapped offset. We 10598 * only reset our wrapped offset to 0 if it is 10599 * currently greater than the current offset. If it 10600 * is less than the current offset, it is because a 10601 * previous allocation induced a wrap -- but the 10602 * allocation didn't subsequently take the space due 10603 * to an error or false predicate evaluation. In this 10604 * case, we'll just leave the wrapped offset alone: if 10605 * the wrapped offset hasn't been advanced far enough 10606 * for this allocation, it will be adjusted in the 10607 * lower loop. 10608 */ 10609 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 10610 if (woffs >= offs) 10611 woffs = 0; 10612 } else { 10613 woffs = 0; 10614 } 10615 10616 /* 10617 * Now we know that we're going to be storing to the 10618 * top of the buffer and that there is room for us 10619 * there. We need to clear the buffer from the current 10620 * offset to the end (there may be old gunk there). 10621 */ 10622 while (offs < buf->dtb_size) 10623 tomax[offs++] = 0; 10624 10625 /* 10626 * We need to set our offset to zero. And because we 10627 * are wrapping, we need to set the bit indicating as 10628 * much. We can also adjust our needed space back 10629 * down to the space required by the ECB -- we know 10630 * that the top of the buffer is aligned. 10631 */ 10632 offs = 0; 10633 total = needed; 10634 buf->dtb_flags |= DTRACEBUF_WRAPPED; 10635 } else { 10636 /* 10637 * There is room for us in the buffer, so we simply 10638 * need to check the wrapped offset. 10639 */ 10640 if (woffs < offs) { 10641 /* 10642 * The wrapped offset is less than the offset. 10643 * This can happen if we allocated buffer space 10644 * that induced a wrap, but then we didn't 10645 * subsequently take the space due to an error 10646 * or false predicate evaluation. This is 10647 * okay; we know that _this_ allocation isn't 10648 * going to induce a wrap. We still can't 10649 * reset the wrapped offset to be zero, 10650 * however: the space may have been trashed in 10651 * the previous failed probe attempt. But at 10652 * least the wrapped offset doesn't need to 10653 * be adjusted at all... 10654 */ 10655 goto out; 10656 } 10657 } 10658 10659 while (offs + total > woffs) { 10660 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); 10661 size_t size; 10662 10663 if (epid == DTRACE_EPIDNONE) { 10664 size = sizeof (uint32_t); 10665 } else { 10666 ASSERT(epid <= state->dts_necbs); 10667 ASSERT(state->dts_ecbs[epid - 1] != NULL); 10668 10669 size = state->dts_ecbs[epid - 1]->dte_size; 10670 } 10671 10672 ASSERT(woffs + size <= buf->dtb_size); 10673 ASSERT(size != 0); 10674 10675 if (woffs + size == buf->dtb_size) { 10676 /* 10677 * We've reached the end of the buffer; we want 10678 * to set the wrapped offset to 0 and break 10679 * out. However, if the offs is 0, then we're 10680 * in a strange edge-condition: the amount of 10681 * space that we want to reserve plus the size 10682 * of the record that we're overwriting is 10683 * greater than the size of the buffer. This 10684 * is problematic because if we reserve the 10685 * space but subsequently don't consume it (due 10686 * to a failed predicate or error) the wrapped 10687 * offset will be 0 -- yet the EPID at offset 0 10688 * will not be committed. This situation is 10689 * relatively easy to deal with: if we're in 10690 * this case, the buffer is indistinguishable 10691 * from one that hasn't wrapped; we need only 10692 * finish the job by clearing the wrapped bit, 10693 * explicitly setting the offset to be 0, and 10694 * zero'ing out the old data in the buffer. 10695 */ 10696 if (offs == 0) { 10697 buf->dtb_flags &= ~DTRACEBUF_WRAPPED; 10698 buf->dtb_offset = 0; 10699 woffs = total; 10700 10701 while (woffs < buf->dtb_size) 10702 tomax[woffs++] = 0; 10703 } 10704 10705 woffs = 0; 10706 break; 10707 } 10708 10709 woffs += size; 10710 } 10711 10712 /* 10713 * We have a wrapped offset. It may be that the wrapped offset 10714 * has become zero -- that's okay. 10715 */ 10716 buf->dtb_xamot_offset = woffs; 10717 } 10718 10719 out: 10720 /* 10721 * Now we can plow the buffer with any necessary padding. 10722 */ 10723 while (offs & (align - 1)) { 10724 /* 10725 * Assert that our alignment is off by a number which 10726 * is itself sizeof (uint32_t) aligned. 10727 */ 10728 ASSERT(!((align - (offs & (align - 1))) & 10729 (sizeof (uint32_t) - 1))); 10730 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 10731 offs += sizeof (uint32_t); 10732 } 10733 10734 if (buf->dtb_flags & DTRACEBUF_FILL) { 10735 if (offs + needed > buf->dtb_size - state->dts_reserve) { 10736 buf->dtb_flags |= DTRACEBUF_FULL; 10737 return (-1); 10738 } 10739 } 10740 10741 if (mstate == NULL) 10742 return (offs); 10743 10744 /* 10745 * For ring buffers and fill buffers, the scratch space is always 10746 * the inactive buffer. 10747 */ 10748 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; 10749 mstate->dtms_scratch_size = buf->dtb_size; 10750 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 10751 10752 return (offs); 10753 } 10754 10755 static void 10756 dtrace_buffer_polish(dtrace_buffer_t *buf) 10757 { 10758 ASSERT(buf->dtb_flags & DTRACEBUF_RING); 10759 ASSERT(MUTEX_HELD(&dtrace_lock)); 10760 10761 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) 10762 return; 10763 10764 /* 10765 * We need to polish the ring buffer. There are three cases: 10766 * 10767 * - The first (and presumably most common) is that there is no gap 10768 * between the buffer offset and the wrapped offset. In this case, 10769 * there is nothing in the buffer that isn't valid data; we can 10770 * mark the buffer as polished and return. 10771 * 10772 * - The second (less common than the first but still more common 10773 * than the third) is that there is a gap between the buffer offset 10774 * and the wrapped offset, and the wrapped offset is larger than the 10775 * buffer offset. This can happen because of an alignment issue, or 10776 * can happen because of a call to dtrace_buffer_reserve() that 10777 * didn't subsequently consume the buffer space. In this case, 10778 * we need to zero the data from the buffer offset to the wrapped 10779 * offset. 10780 * 10781 * - The third (and least common) is that there is a gap between the 10782 * buffer offset and the wrapped offset, but the wrapped offset is 10783 * _less_ than the buffer offset. This can only happen because a 10784 * call to dtrace_buffer_reserve() induced a wrap, but the space 10785 * was not subsequently consumed. In this case, we need to zero the 10786 * space from the offset to the end of the buffer _and_ from the 10787 * top of the buffer to the wrapped offset. 10788 */ 10789 if (buf->dtb_offset < buf->dtb_xamot_offset) { 10790 bzero(buf->dtb_tomax + buf->dtb_offset, 10791 buf->dtb_xamot_offset - buf->dtb_offset); 10792 } 10793 10794 if (buf->dtb_offset > buf->dtb_xamot_offset) { 10795 bzero(buf->dtb_tomax + buf->dtb_offset, 10796 buf->dtb_size - buf->dtb_offset); 10797 bzero(buf->dtb_tomax, buf->dtb_xamot_offset); 10798 } 10799 } 10800 10801 /* 10802 * This routine determines if data generated at the specified time has likely 10803 * been entirely consumed at user-level. This routine is called to determine 10804 * if an ECB on a defunct probe (but for an active enabling) can be safely 10805 * disabled and destroyed. 10806 */ 10807 static int 10808 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) 10809 { 10810 int i; 10811 10812 for (i = 0; i < NCPU; i++) { 10813 dtrace_buffer_t *buf = &bufs[i]; 10814 10815 if (buf->dtb_size == 0) 10816 continue; 10817 10818 if (buf->dtb_flags & DTRACEBUF_RING) 10819 return (0); 10820 10821 if (!buf->dtb_switched && buf->dtb_offset != 0) 10822 return (0); 10823 10824 if (buf->dtb_switched - buf->dtb_interval < when) 10825 return (0); 10826 } 10827 10828 return (1); 10829 } 10830 10831 static void 10832 dtrace_buffer_free(dtrace_buffer_t *bufs) 10833 { 10834 int i; 10835 10836 for (i = 0; i < NCPU; i++) { 10837 dtrace_buffer_t *buf = &bufs[i]; 10838 10839 if (buf->dtb_tomax == NULL) { 10840 ASSERT(buf->dtb_xamot == NULL); 10841 ASSERT(buf->dtb_size == 0); 10842 continue; 10843 } 10844 10845 if (buf->dtb_xamot != NULL) { 10846 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 10847 kmem_free(buf->dtb_xamot, buf->dtb_size); 10848 } 10849 10850 kmem_free(buf->dtb_tomax, buf->dtb_size); 10851 buf->dtb_size = 0; 10852 buf->dtb_tomax = NULL; 10853 buf->dtb_xamot = NULL; 10854 } 10855 } 10856 10857 /* 10858 * DTrace Enabling Functions 10859 */ 10860 static dtrace_enabling_t * 10861 dtrace_enabling_create(dtrace_vstate_t *vstate) 10862 { 10863 dtrace_enabling_t *enab; 10864 10865 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP); 10866 enab->dten_vstate = vstate; 10867 10868 return (enab); 10869 } 10870 10871 static void 10872 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) 10873 { 10874 dtrace_ecbdesc_t **ndesc; 10875 size_t osize, nsize; 10876 10877 /* 10878 * We can't add to enablings after we've enabled them, or after we've 10879 * retained them. 10880 */ 10881 ASSERT(enab->dten_probegen == 0); 10882 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 10883 10884 if (enab->dten_ndesc < enab->dten_maxdesc) { 10885 enab->dten_desc[enab->dten_ndesc++] = ecb; 10886 return; 10887 } 10888 10889 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 10890 10891 if (enab->dten_maxdesc == 0) { 10892 enab->dten_maxdesc = 1; 10893 } else { 10894 enab->dten_maxdesc <<= 1; 10895 } 10896 10897 ASSERT(enab->dten_ndesc < enab->dten_maxdesc); 10898 10899 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 10900 ndesc = kmem_zalloc(nsize, KM_SLEEP); 10901 bcopy(enab->dten_desc, ndesc, osize); 10902 kmem_free(enab->dten_desc, osize); 10903 10904 enab->dten_desc = ndesc; 10905 enab->dten_desc[enab->dten_ndesc++] = ecb; 10906 } 10907 10908 static void 10909 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb, 10910 dtrace_probedesc_t *pd) 10911 { 10912 dtrace_ecbdesc_t *new; 10913 dtrace_predicate_t *pred; 10914 dtrace_actdesc_t *act; 10915 10916 /* 10917 * We're going to create a new ECB description that matches the 10918 * specified ECB in every way, but has the specified probe description. 10919 */ 10920 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 10921 10922 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL) 10923 dtrace_predicate_hold(pred); 10924 10925 for (act = ecb->dted_action; act != NULL; act = act->dtad_next) 10926 dtrace_actdesc_hold(act); 10927 10928 new->dted_action = ecb->dted_action; 10929 new->dted_pred = ecb->dted_pred; 10930 new->dted_probe = *pd; 10931 new->dted_uarg = ecb->dted_uarg; 10932 10933 dtrace_enabling_add(enab, new); 10934 } 10935 10936 static void 10937 dtrace_enabling_dump(dtrace_enabling_t *enab) 10938 { 10939 int i; 10940 10941 for (i = 0; i < enab->dten_ndesc; i++) { 10942 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe; 10943 10944 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i, 10945 desc->dtpd_provider, desc->dtpd_mod, 10946 desc->dtpd_func, desc->dtpd_name); 10947 } 10948 } 10949 10950 static void 10951 dtrace_enabling_destroy(dtrace_enabling_t *enab) 10952 { 10953 int i; 10954 dtrace_ecbdesc_t *ep; 10955 dtrace_vstate_t *vstate = enab->dten_vstate; 10956 10957 ASSERT(MUTEX_HELD(&dtrace_lock)); 10958 10959 for (i = 0; i < enab->dten_ndesc; i++) { 10960 dtrace_actdesc_t *act, *next; 10961 dtrace_predicate_t *pred; 10962 10963 ep = enab->dten_desc[i]; 10964 10965 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) 10966 dtrace_predicate_release(pred, vstate); 10967 10968 for (act = ep->dted_action; act != NULL; act = next) { 10969 next = act->dtad_next; 10970 dtrace_actdesc_release(act, vstate); 10971 } 10972 10973 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 10974 } 10975 10976 kmem_free(enab->dten_desc, 10977 enab->dten_maxdesc * sizeof (dtrace_enabling_t *)); 10978 10979 /* 10980 * If this was a retained enabling, decrement the dts_nretained count 10981 * and take it off of the dtrace_retained list. 10982 */ 10983 if (enab->dten_prev != NULL || enab->dten_next != NULL || 10984 dtrace_retained == enab) { 10985 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10986 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); 10987 enab->dten_vstate->dtvs_state->dts_nretained--; 10988 dtrace_retained_gen++; 10989 } 10990 10991 if (enab->dten_prev == NULL) { 10992 if (dtrace_retained == enab) { 10993 dtrace_retained = enab->dten_next; 10994 10995 if (dtrace_retained != NULL) 10996 dtrace_retained->dten_prev = NULL; 10997 } 10998 } else { 10999 ASSERT(enab != dtrace_retained); 11000 ASSERT(dtrace_retained != NULL); 11001 enab->dten_prev->dten_next = enab->dten_next; 11002 } 11003 11004 if (enab->dten_next != NULL) { 11005 ASSERT(dtrace_retained != NULL); 11006 enab->dten_next->dten_prev = enab->dten_prev; 11007 } 11008 11009 kmem_free(enab, sizeof (dtrace_enabling_t)); 11010 } 11011 11012 static int 11013 dtrace_enabling_retain(dtrace_enabling_t *enab) 11014 { 11015 dtrace_state_t *state; 11016 11017 ASSERT(MUTEX_HELD(&dtrace_lock)); 11018 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 11019 ASSERT(enab->dten_vstate != NULL); 11020 11021 state = enab->dten_vstate->dtvs_state; 11022 ASSERT(state != NULL); 11023 11024 /* 11025 * We only allow each state to retain dtrace_retain_max enablings. 11026 */ 11027 if (state->dts_nretained >= dtrace_retain_max) 11028 return (ENOSPC); 11029 11030 state->dts_nretained++; 11031 dtrace_retained_gen++; 11032 11033 if (dtrace_retained == NULL) { 11034 dtrace_retained = enab; 11035 return (0); 11036 } 11037 11038 enab->dten_next = dtrace_retained; 11039 dtrace_retained->dten_prev = enab; 11040 dtrace_retained = enab; 11041 11042 return (0); 11043 } 11044 11045 static int 11046 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, 11047 dtrace_probedesc_t *create) 11048 { 11049 dtrace_enabling_t *new, *enab; 11050 int found = 0, err = ENOENT; 11051 11052 ASSERT(MUTEX_HELD(&dtrace_lock)); 11053 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); 11054 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); 11055 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); 11056 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); 11057 11058 new = dtrace_enabling_create(&state->dts_vstate); 11059 11060 /* 11061 * Iterate over all retained enablings, looking for enablings that 11062 * match the specified state. 11063 */ 11064 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 11065 int i; 11066 11067 /* 11068 * dtvs_state can only be NULL for helper enablings -- and 11069 * helper enablings can't be retained. 11070 */ 11071 ASSERT(enab->dten_vstate->dtvs_state != NULL); 11072 11073 if (enab->dten_vstate->dtvs_state != state) 11074 continue; 11075 11076 /* 11077 * Now iterate over each probe description; we're looking for 11078 * an exact match to the specified probe description. 11079 */ 11080 for (i = 0; i < enab->dten_ndesc; i++) { 11081 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 11082 dtrace_probedesc_t *pd = &ep->dted_probe; 11083 11084 if (strcmp(pd->dtpd_provider, match->dtpd_provider)) 11085 continue; 11086 11087 if (strcmp(pd->dtpd_mod, match->dtpd_mod)) 11088 continue; 11089 11090 if (strcmp(pd->dtpd_func, match->dtpd_func)) 11091 continue; 11092 11093 if (strcmp(pd->dtpd_name, match->dtpd_name)) 11094 continue; 11095 11096 /* 11097 * We have a winning probe! Add it to our growing 11098 * enabling. 11099 */ 11100 found = 1; 11101 dtrace_enabling_addlike(new, ep, create); 11102 } 11103 } 11104 11105 if (!found || (err = dtrace_enabling_retain(new)) != 0) { 11106 dtrace_enabling_destroy(new); 11107 return (err); 11108 } 11109 11110 return (0); 11111 } 11112 11113 static void 11114 dtrace_enabling_retract(dtrace_state_t *state) 11115 { 11116 dtrace_enabling_t *enab, *next; 11117 11118 ASSERT(MUTEX_HELD(&dtrace_lock)); 11119 11120 /* 11121 * Iterate over all retained enablings, destroy the enablings retained 11122 * for the specified state. 11123 */ 11124 for (enab = dtrace_retained; enab != NULL; enab = next) { 11125 next = enab->dten_next; 11126 11127 /* 11128 * dtvs_state can only be NULL for helper enablings -- and 11129 * helper enablings can't be retained. 11130 */ 11131 ASSERT(enab->dten_vstate->dtvs_state != NULL); 11132 11133 if (enab->dten_vstate->dtvs_state == state) { 11134 ASSERT(state->dts_nretained > 0); 11135 dtrace_enabling_destroy(enab); 11136 } 11137 } 11138 11139 ASSERT(state->dts_nretained == 0); 11140 } 11141 11142 static int 11143 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) 11144 { 11145 int i = 0; 11146 int total_matched = 0, matched = 0; 11147 11148 ASSERT(MUTEX_HELD(&cpu_lock)); 11149 ASSERT(MUTEX_HELD(&dtrace_lock)); 11150 11151 for (i = 0; i < enab->dten_ndesc; i++) { 11152 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 11153 11154 enab->dten_current = ep; 11155 enab->dten_error = 0; 11156 11157 /* 11158 * If a provider failed to enable a probe then get out and 11159 * let the consumer know we failed. 11160 */ 11161 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) 11162 return (EBUSY); 11163 11164 total_matched += matched; 11165 11166 if (enab->dten_error != 0) { 11167 /* 11168 * If we get an error half-way through enabling the 11169 * probes, we kick out -- perhaps with some number of 11170 * them enabled. Leaving enabled probes enabled may 11171 * be slightly confusing for user-level, but we expect 11172 * that no one will attempt to actually drive on in 11173 * the face of such errors. If this is an anonymous 11174 * enabling (indicated with a NULL nmatched pointer), 11175 * we cmn_err() a message. We aren't expecting to 11176 * get such an error -- such as it can exist at all, 11177 * it would be a result of corrupted DOF in the driver 11178 * properties. 11179 */ 11180 if (nmatched == NULL) { 11181 cmn_err(CE_WARN, "dtrace_enabling_match() " 11182 "error on %p: %d", (void *)ep, 11183 enab->dten_error); 11184 } 11185 11186 return (enab->dten_error); 11187 } 11188 } 11189 11190 enab->dten_probegen = dtrace_probegen; 11191 if (nmatched != NULL) 11192 *nmatched = total_matched; 11193 11194 return (0); 11195 } 11196 11197 static void 11198 dtrace_enabling_matchall(void) 11199 { 11200 dtrace_enabling_t *enab; 11201 11202 mutex_enter(&cpu_lock); 11203 mutex_enter(&dtrace_lock); 11204 11205 /* 11206 * Iterate over all retained enablings to see if any probes match 11207 * against them. We only perform this operation on enablings for which 11208 * we have sufficient permissions by virtue of being in the global zone 11209 * or in the same zone as the DTrace client. Because we can be called 11210 * after dtrace_detach() has been called, we cannot assert that there 11211 * are retained enablings. We can safely load from dtrace_retained, 11212 * however: the taskq_destroy() at the end of dtrace_detach() will 11213 * block pending our completion. 11214 */ 11215 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 11216 dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred; 11217 cred_t *cr = dcr->dcr_cred; 11218 zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0; 11219 11220 if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL && 11221 (zone == GLOBAL_ZONEID || getzoneid() == zone))) 11222 (void) dtrace_enabling_match(enab, NULL); 11223 } 11224 11225 mutex_exit(&dtrace_lock); 11226 mutex_exit(&cpu_lock); 11227 } 11228 11229 /* 11230 * If an enabling is to be enabled without having matched probes (that is, if 11231 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 11232 * enabling must be _primed_ by creating an ECB for every ECB description. 11233 * This must be done to assure that we know the number of speculations, the 11234 * number of aggregations, the minimum buffer size needed, etc. before we 11235 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually 11236 * enabling any probes, we create ECBs for every ECB decription, but with a 11237 * NULL probe -- which is exactly what this function does. 11238 */ 11239 static void 11240 dtrace_enabling_prime(dtrace_state_t *state) 11241 { 11242 dtrace_enabling_t *enab; 11243 int i; 11244 11245 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 11246 ASSERT(enab->dten_vstate->dtvs_state != NULL); 11247 11248 if (enab->dten_vstate->dtvs_state != state) 11249 continue; 11250 11251 /* 11252 * We don't want to prime an enabling more than once, lest 11253 * we allow a malicious user to induce resource exhaustion. 11254 * (The ECBs that result from priming an enabling aren't 11255 * leaked -- but they also aren't deallocated until the 11256 * consumer state is destroyed.) 11257 */ 11258 if (enab->dten_primed) 11259 continue; 11260 11261 for (i = 0; i < enab->dten_ndesc; i++) { 11262 enab->dten_current = enab->dten_desc[i]; 11263 (void) dtrace_probe_enable(NULL, enab); 11264 } 11265 11266 enab->dten_primed = 1; 11267 } 11268 } 11269 11270 /* 11271 * Called to indicate that probes should be provided due to retained 11272 * enablings. This is implemented in terms of dtrace_probe_provide(), but it 11273 * must take an initial lap through the enabling calling the dtps_provide() 11274 * entry point explicitly to allow for autocreated probes. 11275 */ 11276 static void 11277 dtrace_enabling_provide(dtrace_provider_t *prv) 11278 { 11279 int i, all = 0; 11280 dtrace_probedesc_t desc; 11281 dtrace_genid_t gen; 11282 11283 ASSERT(MUTEX_HELD(&dtrace_lock)); 11284 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 11285 11286 if (prv == NULL) { 11287 all = 1; 11288 prv = dtrace_provider; 11289 } 11290 11291 do { 11292 dtrace_enabling_t *enab; 11293 void *parg = prv->dtpv_arg; 11294 11295 retry: 11296 gen = dtrace_retained_gen; 11297 for (enab = dtrace_retained; enab != NULL; 11298 enab = enab->dten_next) { 11299 for (i = 0; i < enab->dten_ndesc; i++) { 11300 desc = enab->dten_desc[i]->dted_probe; 11301 mutex_exit(&dtrace_lock); 11302 prv->dtpv_pops.dtps_provide(parg, &desc); 11303 mutex_enter(&dtrace_lock); 11304 /* 11305 * Process the retained enablings again if 11306 * they have changed while we weren't holding 11307 * dtrace_lock. 11308 */ 11309 if (gen != dtrace_retained_gen) 11310 goto retry; 11311 } 11312 } 11313 } while (all && (prv = prv->dtpv_next) != NULL); 11314 11315 mutex_exit(&dtrace_lock); 11316 dtrace_probe_provide(NULL, all ? NULL : prv); 11317 mutex_enter(&dtrace_lock); 11318 } 11319 11320 /* 11321 * Called to reap ECBs that are attached to probes from defunct providers. 11322 */ 11323 static void 11324 dtrace_enabling_reap(void) 11325 { 11326 dtrace_provider_t *prov; 11327 dtrace_probe_t *probe; 11328 dtrace_ecb_t *ecb; 11329 hrtime_t when; 11330 int i; 11331 11332 mutex_enter(&cpu_lock); 11333 mutex_enter(&dtrace_lock); 11334 11335 for (i = 0; i < dtrace_nprobes; i++) { 11336 if ((probe = dtrace_probes[i]) == NULL) 11337 continue; 11338 11339 if (probe->dtpr_ecb == NULL) 11340 continue; 11341 11342 prov = probe->dtpr_provider; 11343 11344 if ((when = prov->dtpv_defunct) == 0) 11345 continue; 11346 11347 /* 11348 * We have ECBs on a defunct provider: we want to reap these 11349 * ECBs to allow the provider to unregister. The destruction 11350 * of these ECBs must be done carefully: if we destroy the ECB 11351 * and the consumer later wishes to consume an EPID that 11352 * corresponds to the destroyed ECB (and if the EPID metadata 11353 * has not been previously consumed), the consumer will abort 11354 * processing on the unknown EPID. To reduce (but not, sadly, 11355 * eliminate) the possibility of this, we will only destroy an 11356 * ECB for a defunct provider if, for the state that 11357 * corresponds to the ECB: 11358 * 11359 * (a) There is no speculative tracing (which can effectively 11360 * cache an EPID for an arbitrary amount of time). 11361 * 11362 * (b) The principal buffers have been switched twice since the 11363 * provider became defunct. 11364 * 11365 * (c) The aggregation buffers are of zero size or have been 11366 * switched twice since the provider became defunct. 11367 * 11368 * We use dts_speculates to determine (a) and call a function 11369 * (dtrace_buffer_consumed()) to determine (b) and (c). Note 11370 * that as soon as we've been unable to destroy one of the ECBs 11371 * associated with the probe, we quit trying -- reaping is only 11372 * fruitful in as much as we can destroy all ECBs associated 11373 * with the defunct provider's probes. 11374 */ 11375 while ((ecb = probe->dtpr_ecb) != NULL) { 11376 dtrace_state_t *state = ecb->dte_state; 11377 dtrace_buffer_t *buf = state->dts_buffer; 11378 dtrace_buffer_t *aggbuf = state->dts_aggbuffer; 11379 11380 if (state->dts_speculates) 11381 break; 11382 11383 if (!dtrace_buffer_consumed(buf, when)) 11384 break; 11385 11386 if (!dtrace_buffer_consumed(aggbuf, when)) 11387 break; 11388 11389 dtrace_ecb_disable(ecb); 11390 ASSERT(probe->dtpr_ecb != ecb); 11391 dtrace_ecb_destroy(ecb); 11392 } 11393 } 11394 11395 mutex_exit(&dtrace_lock); 11396 mutex_exit(&cpu_lock); 11397 } 11398 11399 /* 11400 * DTrace DOF Functions 11401 */ 11402 /*ARGSUSED*/ 11403 static void 11404 dtrace_dof_error(dof_hdr_t *dof, const char *str) 11405 { 11406 if (dtrace_err_verbose) 11407 cmn_err(CE_WARN, "failed to process DOF: %s", str); 11408 11409 #ifdef DTRACE_ERRDEBUG 11410 dtrace_errdebug(str); 11411 #endif 11412 } 11413 11414 /* 11415 * Create DOF out of a currently enabled state. Right now, we only create 11416 * DOF containing the run-time options -- but this could be expanded to create 11417 * complete DOF representing the enabled state. 11418 */ 11419 static dof_hdr_t * 11420 dtrace_dof_create(dtrace_state_t *state) 11421 { 11422 dof_hdr_t *dof; 11423 dof_sec_t *sec; 11424 dof_optdesc_t *opt; 11425 int i, len = sizeof (dof_hdr_t) + 11426 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + 11427 sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 11428 11429 ASSERT(MUTEX_HELD(&dtrace_lock)); 11430 11431 dof = kmem_zalloc(len, KM_SLEEP); 11432 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; 11433 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; 11434 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; 11435 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; 11436 11437 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; 11438 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; 11439 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; 11440 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; 11441 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; 11442 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; 11443 11444 dof->dofh_flags = 0; 11445 dof->dofh_hdrsize = sizeof (dof_hdr_t); 11446 dof->dofh_secsize = sizeof (dof_sec_t); 11447 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ 11448 dof->dofh_secoff = sizeof (dof_hdr_t); 11449 dof->dofh_loadsz = len; 11450 dof->dofh_filesz = len; 11451 dof->dofh_pad = 0; 11452 11453 /* 11454 * Fill in the option section header... 11455 */ 11456 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); 11457 sec->dofs_type = DOF_SECT_OPTDESC; 11458 sec->dofs_align = sizeof (uint64_t); 11459 sec->dofs_flags = DOF_SECF_LOAD; 11460 sec->dofs_entsize = sizeof (dof_optdesc_t); 11461 11462 opt = (dof_optdesc_t *)((uintptr_t)sec + 11463 roundup(sizeof (dof_sec_t), sizeof (uint64_t))); 11464 11465 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; 11466 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 11467 11468 for (i = 0; i < DTRACEOPT_MAX; i++) { 11469 opt[i].dofo_option = i; 11470 opt[i].dofo_strtab = DOF_SECIDX_NONE; 11471 opt[i].dofo_value = state->dts_options[i]; 11472 } 11473 11474 return (dof); 11475 } 11476 11477 static dof_hdr_t * 11478 dtrace_dof_copyin(uintptr_t uarg, int *errp) 11479 { 11480 dof_hdr_t hdr, *dof; 11481 11482 ASSERT(!MUTEX_HELD(&dtrace_lock)); 11483 11484 /* 11485 * First, we're going to copyin() the sizeof (dof_hdr_t). 11486 */ 11487 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { 11488 dtrace_dof_error(NULL, "failed to copyin DOF header"); 11489 *errp = EFAULT; 11490 return (NULL); 11491 } 11492 11493 /* 11494 * Now we'll allocate the entire DOF and copy it in -- provided 11495 * that the length isn't outrageous. 11496 */ 11497 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 11498 dtrace_dof_error(&hdr, "load size exceeds maximum"); 11499 *errp = E2BIG; 11500 return (NULL); 11501 } 11502 11503 if (hdr.dofh_loadsz < sizeof (hdr)) { 11504 dtrace_dof_error(&hdr, "invalid load size"); 11505 *errp = EINVAL; 11506 return (NULL); 11507 } 11508 11509 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); 11510 11511 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || 11512 dof->dofh_loadsz != hdr.dofh_loadsz) { 11513 kmem_free(dof, hdr.dofh_loadsz); 11514 *errp = EFAULT; 11515 return (NULL); 11516 } 11517 11518 return (dof); 11519 } 11520 11521 static dof_hdr_t * 11522 dtrace_dof_property(const char *name) 11523 { 11524 uchar_t *buf; 11525 uint64_t loadsz; 11526 unsigned int len, i; 11527 dof_hdr_t *dof; 11528 11529 /* 11530 * Unfortunately, array of values in .conf files are always (and 11531 * only) interpreted to be integer arrays. We must read our DOF 11532 * as an integer array, and then squeeze it into a byte array. 11533 */ 11534 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, 11535 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) 11536 return (NULL); 11537 11538 for (i = 0; i < len; i++) 11539 buf[i] = (uchar_t)(((int *)buf)[i]); 11540 11541 if (len < sizeof (dof_hdr_t)) { 11542 ddi_prop_free(buf); 11543 dtrace_dof_error(NULL, "truncated header"); 11544 return (NULL); 11545 } 11546 11547 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { 11548 ddi_prop_free(buf); 11549 dtrace_dof_error(NULL, "truncated DOF"); 11550 return (NULL); 11551 } 11552 11553 if (loadsz >= dtrace_dof_maxsize) { 11554 ddi_prop_free(buf); 11555 dtrace_dof_error(NULL, "oversized DOF"); 11556 return (NULL); 11557 } 11558 11559 dof = kmem_alloc(loadsz, KM_SLEEP); 11560 bcopy(buf, dof, loadsz); 11561 ddi_prop_free(buf); 11562 11563 return (dof); 11564 } 11565 11566 static void 11567 dtrace_dof_destroy(dof_hdr_t *dof) 11568 { 11569 kmem_free(dof, dof->dofh_loadsz); 11570 } 11571 11572 /* 11573 * Return the dof_sec_t pointer corresponding to a given section index. If the 11574 * index is not valid, dtrace_dof_error() is called and NULL is returned. If 11575 * a type other than DOF_SECT_NONE is specified, the header is checked against 11576 * this type and NULL is returned if the types do not match. 11577 */ 11578 static dof_sec_t * 11579 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i) 11580 { 11581 dof_sec_t *sec = (dof_sec_t *)(uintptr_t) 11582 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize); 11583 11584 if (i >= dof->dofh_secnum) { 11585 dtrace_dof_error(dof, "referenced section index is invalid"); 11586 return (NULL); 11587 } 11588 11589 if (!(sec->dofs_flags & DOF_SECF_LOAD)) { 11590 dtrace_dof_error(dof, "referenced section is not loadable"); 11591 return (NULL); 11592 } 11593 11594 if (type != DOF_SECT_NONE && type != sec->dofs_type) { 11595 dtrace_dof_error(dof, "referenced section is the wrong type"); 11596 return (NULL); 11597 } 11598 11599 return (sec); 11600 } 11601 11602 static dtrace_probedesc_t * 11603 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) 11604 { 11605 dof_probedesc_t *probe; 11606 dof_sec_t *strtab; 11607 uintptr_t daddr = (uintptr_t)dof; 11608 uintptr_t str; 11609 size_t size; 11610 11611 if (sec->dofs_type != DOF_SECT_PROBEDESC) { 11612 dtrace_dof_error(dof, "invalid probe section"); 11613 return (NULL); 11614 } 11615 11616 if (sec->dofs_align != sizeof (dof_secidx_t)) { 11617 dtrace_dof_error(dof, "bad alignment in probe description"); 11618 return (NULL); 11619 } 11620 11621 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) { 11622 dtrace_dof_error(dof, "truncated probe description"); 11623 return (NULL); 11624 } 11625 11626 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset); 11627 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); 11628 11629 if (strtab == NULL) 11630 return (NULL); 11631 11632 str = daddr + strtab->dofs_offset; 11633 size = strtab->dofs_size; 11634 11635 if (probe->dofp_provider >= strtab->dofs_size) { 11636 dtrace_dof_error(dof, "corrupt probe provider"); 11637 return (NULL); 11638 } 11639 11640 (void) strncpy(desc->dtpd_provider, 11641 (char *)(str + probe->dofp_provider), 11642 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); 11643 11644 if (probe->dofp_mod >= strtab->dofs_size) { 11645 dtrace_dof_error(dof, "corrupt probe module"); 11646 return (NULL); 11647 } 11648 11649 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), 11650 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); 11651 11652 if (probe->dofp_func >= strtab->dofs_size) { 11653 dtrace_dof_error(dof, "corrupt probe function"); 11654 return (NULL); 11655 } 11656 11657 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), 11658 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); 11659 11660 if (probe->dofp_name >= strtab->dofs_size) { 11661 dtrace_dof_error(dof, "corrupt probe name"); 11662 return (NULL); 11663 } 11664 11665 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), 11666 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); 11667 11668 return (desc); 11669 } 11670 11671 static dtrace_difo_t * 11672 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11673 cred_t *cr) 11674 { 11675 dtrace_difo_t *dp; 11676 size_t ttl = 0; 11677 dof_difohdr_t *dofd; 11678 uintptr_t daddr = (uintptr_t)dof; 11679 size_t max = dtrace_difo_maxsize; 11680 int i, l, n; 11681 11682 static const struct { 11683 int section; 11684 int bufoffs; 11685 int lenoffs; 11686 int entsize; 11687 int align; 11688 const char *msg; 11689 } difo[] = { 11690 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf), 11691 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t), 11692 sizeof (dif_instr_t), "multiple DIF sections" }, 11693 11694 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab), 11695 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t), 11696 sizeof (uint64_t), "multiple integer tables" }, 11697 11698 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab), 11699 offsetof(dtrace_difo_t, dtdo_strlen), 0, 11700 sizeof (char), "multiple string tables" }, 11701 11702 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab), 11703 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), 11704 sizeof (uint_t), "multiple variable tables" }, 11705 11706 { DOF_SECT_NONE, 0, 0, 0, NULL } 11707 }; 11708 11709 if (sec->dofs_type != DOF_SECT_DIFOHDR) { 11710 dtrace_dof_error(dof, "invalid DIFO header section"); 11711 return (NULL); 11712 } 11713 11714 if (sec->dofs_align != sizeof (dof_secidx_t)) { 11715 dtrace_dof_error(dof, "bad alignment in DIFO header"); 11716 return (NULL); 11717 } 11718 11719 if (sec->dofs_size < sizeof (dof_difohdr_t) || 11720 sec->dofs_size % sizeof (dof_secidx_t)) { 11721 dtrace_dof_error(dof, "bad size in DIFO header"); 11722 return (NULL); 11723 } 11724 11725 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 11726 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1; 11727 11728 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 11729 dp->dtdo_rtype = dofd->dofd_rtype; 11730 11731 for (l = 0; l < n; l++) { 11732 dof_sec_t *subsec; 11733 void **bufp; 11734 uint32_t *lenp; 11735 11736 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, 11737 dofd->dofd_links[l])) == NULL) 11738 goto err; /* invalid section link */ 11739 11740 if (ttl + subsec->dofs_size > max) { 11741 dtrace_dof_error(dof, "exceeds maximum size"); 11742 goto err; 11743 } 11744 11745 ttl += subsec->dofs_size; 11746 11747 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { 11748 if (subsec->dofs_type != difo[i].section) 11749 continue; 11750 11751 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { 11752 dtrace_dof_error(dof, "section not loaded"); 11753 goto err; 11754 } 11755 11756 if (subsec->dofs_align != difo[i].align) { 11757 dtrace_dof_error(dof, "bad alignment"); 11758 goto err; 11759 } 11760 11761 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); 11762 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); 11763 11764 if (*bufp != NULL) { 11765 dtrace_dof_error(dof, difo[i].msg); 11766 goto err; 11767 } 11768 11769 if (difo[i].entsize != subsec->dofs_entsize) { 11770 dtrace_dof_error(dof, "entry size mismatch"); 11771 goto err; 11772 } 11773 11774 if (subsec->dofs_entsize != 0 && 11775 (subsec->dofs_size % subsec->dofs_entsize) != 0) { 11776 dtrace_dof_error(dof, "corrupt entry size"); 11777 goto err; 11778 } 11779 11780 *lenp = subsec->dofs_size; 11781 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP); 11782 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset), 11783 *bufp, subsec->dofs_size); 11784 11785 if (subsec->dofs_entsize != 0) 11786 *lenp /= subsec->dofs_entsize; 11787 11788 break; 11789 } 11790 11791 /* 11792 * If we encounter a loadable DIFO sub-section that is not 11793 * known to us, assume this is a broken program and fail. 11794 */ 11795 if (difo[i].section == DOF_SECT_NONE && 11796 (subsec->dofs_flags & DOF_SECF_LOAD)) { 11797 dtrace_dof_error(dof, "unrecognized DIFO subsection"); 11798 goto err; 11799 } 11800 } 11801 11802 if (dp->dtdo_buf == NULL) { 11803 /* 11804 * We can't have a DIF object without DIF text. 11805 */ 11806 dtrace_dof_error(dof, "missing DIF text"); 11807 goto err; 11808 } 11809 11810 /* 11811 * Before we validate the DIF object, run through the variable table 11812 * looking for the strings -- if any of their size are under, we'll set 11813 * their size to be the system-wide default string size. Note that 11814 * this should _not_ happen if the "strsize" option has been set -- 11815 * in this case, the compiler should have set the size to reflect the 11816 * setting of the option. 11817 */ 11818 for (i = 0; i < dp->dtdo_varlen; i++) { 11819 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 11820 dtrace_diftype_t *t = &v->dtdv_type; 11821 11822 if (v->dtdv_id < DIF_VAR_OTHER_UBASE) 11823 continue; 11824 11825 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) 11826 t->dtdt_size = dtrace_strsize_default; 11827 } 11828 11829 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) 11830 goto err; 11831 11832 dtrace_difo_init(dp, vstate); 11833 return (dp); 11834 11835 err: 11836 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 11837 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 11838 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 11839 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 11840 11841 kmem_free(dp, sizeof (dtrace_difo_t)); 11842 return (NULL); 11843 } 11844 11845 static dtrace_predicate_t * 11846 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11847 cred_t *cr) 11848 { 11849 dtrace_difo_t *dp; 11850 11851 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) 11852 return (NULL); 11853 11854 return (dtrace_predicate_create(dp)); 11855 } 11856 11857 static dtrace_actdesc_t * 11858 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11859 cred_t *cr) 11860 { 11861 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next; 11862 dof_actdesc_t *desc; 11863 dof_sec_t *difosec; 11864 size_t offs; 11865 uintptr_t daddr = (uintptr_t)dof; 11866 uint64_t arg; 11867 dtrace_actkind_t kind; 11868 11869 if (sec->dofs_type != DOF_SECT_ACTDESC) { 11870 dtrace_dof_error(dof, "invalid action section"); 11871 return (NULL); 11872 } 11873 11874 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) { 11875 dtrace_dof_error(dof, "truncated action description"); 11876 return (NULL); 11877 } 11878 11879 if (sec->dofs_align != sizeof (uint64_t)) { 11880 dtrace_dof_error(dof, "bad alignment in action description"); 11881 return (NULL); 11882 } 11883 11884 if (sec->dofs_size < sec->dofs_entsize) { 11885 dtrace_dof_error(dof, "section entry size exceeds total size"); 11886 return (NULL); 11887 } 11888 11889 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) { 11890 dtrace_dof_error(dof, "bad entry size in action description"); 11891 return (NULL); 11892 } 11893 11894 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) { 11895 dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); 11896 return (NULL); 11897 } 11898 11899 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { 11900 desc = (dof_actdesc_t *)(daddr + 11901 (uintptr_t)sec->dofs_offset + offs); 11902 kind = (dtrace_actkind_t)desc->dofa_kind; 11903 11904 if ((DTRACEACT_ISPRINTFLIKE(kind) && 11905 (kind != DTRACEACT_PRINTA || 11906 desc->dofa_strtab != DOF_SECIDX_NONE)) || 11907 (kind == DTRACEACT_DIFEXPR && 11908 desc->dofa_strtab != DOF_SECIDX_NONE)) { 11909 dof_sec_t *strtab; 11910 char *str, *fmt; 11911 uint64_t i; 11912 11913 /* 11914 * The argument to these actions is an index into the 11915 * DOF string table. For printf()-like actions, this 11916 * is the format string. For print(), this is the 11917 * CTF type of the expression result. 11918 */ 11919 if ((strtab = dtrace_dof_sect(dof, 11920 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) 11921 goto err; 11922 11923 str = (char *)((uintptr_t)dof + 11924 (uintptr_t)strtab->dofs_offset); 11925 11926 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { 11927 if (str[i] == '\0') 11928 break; 11929 } 11930 11931 if (i >= strtab->dofs_size) { 11932 dtrace_dof_error(dof, "bogus format string"); 11933 goto err; 11934 } 11935 11936 if (i == desc->dofa_arg) { 11937 dtrace_dof_error(dof, "empty format string"); 11938 goto err; 11939 } 11940 11941 i -= desc->dofa_arg; 11942 fmt = kmem_alloc(i + 1, KM_SLEEP); 11943 bcopy(&str[desc->dofa_arg], fmt, i + 1); 11944 arg = (uint64_t)(uintptr_t)fmt; 11945 } else { 11946 if (kind == DTRACEACT_PRINTA) { 11947 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); 11948 arg = 0; 11949 } else { 11950 arg = desc->dofa_arg; 11951 } 11952 } 11953 11954 act = dtrace_actdesc_create(kind, desc->dofa_ntuple, 11955 desc->dofa_uarg, arg); 11956 11957 if (last != NULL) { 11958 last->dtad_next = act; 11959 } else { 11960 first = act; 11961 } 11962 11963 last = act; 11964 11965 if (desc->dofa_difo == DOF_SECIDX_NONE) 11966 continue; 11967 11968 if ((difosec = dtrace_dof_sect(dof, 11969 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL) 11970 goto err; 11971 11972 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); 11973 11974 if (act->dtad_difo == NULL) 11975 goto err; 11976 } 11977 11978 ASSERT(first != NULL); 11979 return (first); 11980 11981 err: 11982 for (act = first; act != NULL; act = next) { 11983 next = act->dtad_next; 11984 dtrace_actdesc_release(act, vstate); 11985 } 11986 11987 return (NULL); 11988 } 11989 11990 static dtrace_ecbdesc_t * 11991 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11992 cred_t *cr) 11993 { 11994 dtrace_ecbdesc_t *ep; 11995 dof_ecbdesc_t *ecb; 11996 dtrace_probedesc_t *desc; 11997 dtrace_predicate_t *pred = NULL; 11998 11999 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) { 12000 dtrace_dof_error(dof, "truncated ECB description"); 12001 return (NULL); 12002 } 12003 12004 if (sec->dofs_align != sizeof (uint64_t)) { 12005 dtrace_dof_error(dof, "bad alignment in ECB description"); 12006 return (NULL); 12007 } 12008 12009 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset); 12010 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); 12011 12012 if (sec == NULL) 12013 return (NULL); 12014 12015 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 12016 ep->dted_uarg = ecb->dofe_uarg; 12017 desc = &ep->dted_probe; 12018 12019 if (dtrace_dof_probedesc(dof, sec, desc) == NULL) 12020 goto err; 12021 12022 if (ecb->dofe_pred != DOF_SECIDX_NONE) { 12023 if ((sec = dtrace_dof_sect(dof, 12024 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL) 12025 goto err; 12026 12027 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL) 12028 goto err; 12029 12030 ep->dted_pred.dtpdd_predicate = pred; 12031 } 12032 12033 if (ecb->dofe_actions != DOF_SECIDX_NONE) { 12034 if ((sec = dtrace_dof_sect(dof, 12035 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL) 12036 goto err; 12037 12038 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); 12039 12040 if (ep->dted_action == NULL) 12041 goto err; 12042 } 12043 12044 return (ep); 12045 12046 err: 12047 if (pred != NULL) 12048 dtrace_predicate_release(pred, vstate); 12049 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 12050 return (NULL); 12051 } 12052 12053 /* 12054 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the 12055 * specified DOF. At present, this amounts to simply adding 'ubase' to the 12056 * site of any user SETX relocations to account for load object base address. 12057 * In the future, if we need other relocations, this function can be extended. 12058 */ 12059 static int 12060 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) 12061 { 12062 uintptr_t daddr = (uintptr_t)dof; 12063 dof_relohdr_t *dofr = 12064 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 12065 dof_sec_t *ss, *rs, *ts; 12066 dof_relodesc_t *r; 12067 uint_t i, n; 12068 12069 if (sec->dofs_size < sizeof (dof_relohdr_t) || 12070 sec->dofs_align != sizeof (dof_secidx_t)) { 12071 dtrace_dof_error(dof, "invalid relocation header"); 12072 return (-1); 12073 } 12074 12075 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); 12076 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); 12077 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); 12078 12079 if (ss == NULL || rs == NULL || ts == NULL) 12080 return (-1); /* dtrace_dof_error() has been called already */ 12081 12082 if (rs->dofs_entsize < sizeof (dof_relodesc_t) || 12083 rs->dofs_align != sizeof (uint64_t)) { 12084 dtrace_dof_error(dof, "invalid relocation section"); 12085 return (-1); 12086 } 12087 12088 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); 12089 n = rs->dofs_size / rs->dofs_entsize; 12090 12091 for (i = 0; i < n; i++) { 12092 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; 12093 12094 switch (r->dofr_type) { 12095 case DOF_RELO_NONE: 12096 break; 12097 case DOF_RELO_SETX: 12098 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + 12099 sizeof (uint64_t) > ts->dofs_size) { 12100 dtrace_dof_error(dof, "bad relocation offset"); 12101 return (-1); 12102 } 12103 12104 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { 12105 dtrace_dof_error(dof, "misaligned setx relo"); 12106 return (-1); 12107 } 12108 12109 *(uint64_t *)taddr += ubase; 12110 break; 12111 default: 12112 dtrace_dof_error(dof, "invalid relocation type"); 12113 return (-1); 12114 } 12115 12116 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); 12117 } 12118 12119 return (0); 12120 } 12121 12122 /* 12123 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 12124 * header: it should be at the front of a memory region that is at least 12125 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 12126 * size. It need not be validated in any other way. 12127 */ 12128 static int 12129 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, 12130 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) 12131 { 12132 uint64_t len = dof->dofh_loadsz, seclen; 12133 uintptr_t daddr = (uintptr_t)dof; 12134 dtrace_ecbdesc_t *ep; 12135 dtrace_enabling_t *enab; 12136 uint_t i; 12137 12138 ASSERT(MUTEX_HELD(&dtrace_lock)); 12139 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); 12140 12141 /* 12142 * Check the DOF header identification bytes. In addition to checking 12143 * valid settings, we also verify that unused bits/bytes are zeroed so 12144 * we can use them later without fear of regressing existing binaries. 12145 */ 12146 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0], 12147 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) { 12148 dtrace_dof_error(dof, "DOF magic string mismatch"); 12149 return (-1); 12150 } 12151 12152 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && 12153 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { 12154 dtrace_dof_error(dof, "DOF has invalid data model"); 12155 return (-1); 12156 } 12157 12158 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { 12159 dtrace_dof_error(dof, "DOF encoding mismatch"); 12160 return (-1); 12161 } 12162 12163 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 12164 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { 12165 dtrace_dof_error(dof, "DOF version mismatch"); 12166 return (-1); 12167 } 12168 12169 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { 12170 dtrace_dof_error(dof, "DOF uses unsupported instruction set"); 12171 return (-1); 12172 } 12173 12174 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { 12175 dtrace_dof_error(dof, "DOF uses too many integer registers"); 12176 return (-1); 12177 } 12178 12179 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { 12180 dtrace_dof_error(dof, "DOF uses too many tuple registers"); 12181 return (-1); 12182 } 12183 12184 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { 12185 if (dof->dofh_ident[i] != 0) { 12186 dtrace_dof_error(dof, "DOF has invalid ident byte set"); 12187 return (-1); 12188 } 12189 } 12190 12191 if (dof->dofh_flags & ~DOF_FL_VALID) { 12192 dtrace_dof_error(dof, "DOF has invalid flag bits set"); 12193 return (-1); 12194 } 12195 12196 if (dof->dofh_secsize == 0) { 12197 dtrace_dof_error(dof, "zero section header size"); 12198 return (-1); 12199 } 12200 12201 /* 12202 * Check that the section headers don't exceed the amount of DOF 12203 * data. Note that we cast the section size and number of sections 12204 * to uint64_t's to prevent possible overflow in the multiplication. 12205 */ 12206 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; 12207 12208 if (dof->dofh_secoff > len || seclen > len || 12209 dof->dofh_secoff + seclen > len) { 12210 dtrace_dof_error(dof, "truncated section headers"); 12211 return (-1); 12212 } 12213 12214 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) { 12215 dtrace_dof_error(dof, "misaligned section headers"); 12216 return (-1); 12217 } 12218 12219 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) { 12220 dtrace_dof_error(dof, "misaligned section size"); 12221 return (-1); 12222 } 12223 12224 /* 12225 * Take an initial pass through the section headers to be sure that 12226 * the headers don't have stray offsets. If the 'noprobes' flag is 12227 * set, do not permit sections relating to providers, probes, or args. 12228 */ 12229 for (i = 0; i < dof->dofh_secnum; i++) { 12230 dof_sec_t *sec = (dof_sec_t *)(daddr + 12231 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12232 12233 if (noprobes) { 12234 switch (sec->dofs_type) { 12235 case DOF_SECT_PROVIDER: 12236 case DOF_SECT_PROBES: 12237 case DOF_SECT_PRARGS: 12238 case DOF_SECT_PROFFS: 12239 dtrace_dof_error(dof, "illegal sections " 12240 "for enabling"); 12241 return (-1); 12242 } 12243 } 12244 12245 if (DOF_SEC_ISLOADABLE(sec->dofs_type) && 12246 !(sec->dofs_flags & DOF_SECF_LOAD)) { 12247 dtrace_dof_error(dof, "loadable section with load " 12248 "flag unset"); 12249 return (-1); 12250 } 12251 12252 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 12253 continue; /* just ignore non-loadable sections */ 12254 12255 if (sec->dofs_align & (sec->dofs_align - 1)) { 12256 dtrace_dof_error(dof, "bad section alignment"); 12257 return (-1); 12258 } 12259 12260 if (sec->dofs_offset & (sec->dofs_align - 1)) { 12261 dtrace_dof_error(dof, "misaligned section"); 12262 return (-1); 12263 } 12264 12265 if (sec->dofs_offset > len || sec->dofs_size > len || 12266 sec->dofs_offset + sec->dofs_size > len) { 12267 dtrace_dof_error(dof, "corrupt section header"); 12268 return (-1); 12269 } 12270 12271 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + 12272 sec->dofs_offset + sec->dofs_size - 1) != '\0') { 12273 dtrace_dof_error(dof, "non-terminating string table"); 12274 return (-1); 12275 } 12276 } 12277 12278 /* 12279 * Take a second pass through the sections and locate and perform any 12280 * relocations that are present. We do this after the first pass to 12281 * be sure that all sections have had their headers validated. 12282 */ 12283 for (i = 0; i < dof->dofh_secnum; i++) { 12284 dof_sec_t *sec = (dof_sec_t *)(daddr + 12285 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12286 12287 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 12288 continue; /* skip sections that are not loadable */ 12289 12290 switch (sec->dofs_type) { 12291 case DOF_SECT_URELHDR: 12292 if (dtrace_dof_relocate(dof, sec, ubase) != 0) 12293 return (-1); 12294 break; 12295 } 12296 } 12297 12298 if ((enab = *enabp) == NULL) 12299 enab = *enabp = dtrace_enabling_create(vstate); 12300 12301 for (i = 0; i < dof->dofh_secnum; i++) { 12302 dof_sec_t *sec = (dof_sec_t *)(daddr + 12303 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12304 12305 if (sec->dofs_type != DOF_SECT_ECBDESC) 12306 continue; 12307 12308 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { 12309 dtrace_enabling_destroy(enab); 12310 *enabp = NULL; 12311 return (-1); 12312 } 12313 12314 dtrace_enabling_add(enab, ep); 12315 } 12316 12317 return (0); 12318 } 12319 12320 /* 12321 * Process DOF for any options. This routine assumes that the DOF has been 12322 * at least processed by dtrace_dof_slurp(). 12323 */ 12324 static int 12325 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) 12326 { 12327 int i, rval; 12328 uint32_t entsize; 12329 size_t offs; 12330 dof_optdesc_t *desc; 12331 12332 for (i = 0; i < dof->dofh_secnum; i++) { 12333 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof + 12334 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12335 12336 if (sec->dofs_type != DOF_SECT_OPTDESC) 12337 continue; 12338 12339 if (sec->dofs_align != sizeof (uint64_t)) { 12340 dtrace_dof_error(dof, "bad alignment in " 12341 "option description"); 12342 return (EINVAL); 12343 } 12344 12345 if ((entsize = sec->dofs_entsize) == 0) { 12346 dtrace_dof_error(dof, "zeroed option entry size"); 12347 return (EINVAL); 12348 } 12349 12350 if (entsize < sizeof (dof_optdesc_t)) { 12351 dtrace_dof_error(dof, "bad option entry size"); 12352 return (EINVAL); 12353 } 12354 12355 for (offs = 0; offs < sec->dofs_size; offs += entsize) { 12356 desc = (dof_optdesc_t *)((uintptr_t)dof + 12357 (uintptr_t)sec->dofs_offset + offs); 12358 12359 if (desc->dofo_strtab != DOF_SECIDX_NONE) { 12360 dtrace_dof_error(dof, "non-zero option string"); 12361 return (EINVAL); 12362 } 12363 12364 if (desc->dofo_value == DTRACEOPT_UNSET) { 12365 dtrace_dof_error(dof, "unset option"); 12366 return (EINVAL); 12367 } 12368 12369 if ((rval = dtrace_state_option(state, 12370 desc->dofo_option, desc->dofo_value)) != 0) { 12371 dtrace_dof_error(dof, "rejected option"); 12372 return (rval); 12373 } 12374 } 12375 } 12376 12377 return (0); 12378 } 12379 12380 /* 12381 * DTrace Consumer State Functions 12382 */ 12383 int 12384 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) 12385 { 12386 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize; 12387 void *base; 12388 uintptr_t limit; 12389 dtrace_dynvar_t *dvar, *next, *start; 12390 int i; 12391 12392 ASSERT(MUTEX_HELD(&dtrace_lock)); 12393 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); 12394 12395 bzero(dstate, sizeof (dtrace_dstate_t)); 12396 12397 if ((dstate->dtds_chunksize = chunksize) == 0) 12398 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; 12399 12400 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) 12401 size = min; 12402 12403 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) 12404 return (ENOMEM); 12405 12406 dstate->dtds_size = size; 12407 dstate->dtds_base = base; 12408 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); 12409 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t)); 12410 12411 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); 12412 12413 if (hashsize != 1 && (hashsize & 1)) 12414 hashsize--; 12415 12416 dstate->dtds_hashsize = hashsize; 12417 dstate->dtds_hash = dstate->dtds_base; 12418 12419 /* 12420 * Set all of our hash buckets to point to the single sink, and (if 12421 * it hasn't already been set), set the sink's hash value to be the 12422 * sink sentinel value. The sink is needed for dynamic variable 12423 * lookups to know that they have iterated over an entire, valid hash 12424 * chain. 12425 */ 12426 for (i = 0; i < hashsize; i++) 12427 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; 12428 12429 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) 12430 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; 12431 12432 /* 12433 * Determine number of active CPUs. Divide free list evenly among 12434 * active CPUs. 12435 */ 12436 start = (dtrace_dynvar_t *) 12437 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); 12438 limit = (uintptr_t)base + size; 12439 12440 maxper = (limit - (uintptr_t)start) / NCPU; 12441 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; 12442 12443 for (i = 0; i < NCPU; i++) { 12444 dstate->dtds_percpu[i].dtdsc_free = dvar = start; 12445 12446 /* 12447 * If we don't even have enough chunks to make it once through 12448 * NCPUs, we're just going to allocate everything to the first 12449 * CPU. And if we're on the last CPU, we're going to allocate 12450 * whatever is left over. In either case, we set the limit to 12451 * be the limit of the dynamic variable space. 12452 */ 12453 if (maxper == 0 || i == NCPU - 1) { 12454 limit = (uintptr_t)base + size; 12455 start = NULL; 12456 } else { 12457 limit = (uintptr_t)start + maxper; 12458 start = (dtrace_dynvar_t *)limit; 12459 } 12460 12461 ASSERT(limit <= (uintptr_t)base + size); 12462 12463 for (;;) { 12464 next = (dtrace_dynvar_t *)((uintptr_t)dvar + 12465 dstate->dtds_chunksize); 12466 12467 if ((uintptr_t)next + dstate->dtds_chunksize >= limit) 12468 break; 12469 12470 dvar->dtdv_next = next; 12471 dvar = next; 12472 } 12473 12474 if (maxper == 0) 12475 break; 12476 } 12477 12478 return (0); 12479 } 12480 12481 void 12482 dtrace_dstate_fini(dtrace_dstate_t *dstate) 12483 { 12484 ASSERT(MUTEX_HELD(&cpu_lock)); 12485 12486 if (dstate->dtds_base == NULL) 12487 return; 12488 12489 kmem_free(dstate->dtds_base, dstate->dtds_size); 12490 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu); 12491 } 12492 12493 static void 12494 dtrace_vstate_fini(dtrace_vstate_t *vstate) 12495 { 12496 /* 12497 * Logical XOR, where are you? 12498 */ 12499 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); 12500 12501 if (vstate->dtvs_nglobals > 0) { 12502 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals * 12503 sizeof (dtrace_statvar_t *)); 12504 } 12505 12506 if (vstate->dtvs_ntlocals > 0) { 12507 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals * 12508 sizeof (dtrace_difv_t)); 12509 } 12510 12511 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); 12512 12513 if (vstate->dtvs_nlocals > 0) { 12514 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals * 12515 sizeof (dtrace_statvar_t *)); 12516 } 12517 } 12518 12519 static void 12520 dtrace_state_clean(dtrace_state_t *state) 12521 { 12522 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 12523 return; 12524 12525 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 12526 dtrace_speculation_clean(state); 12527 } 12528 12529 static void 12530 dtrace_state_deadman(dtrace_state_t *state) 12531 { 12532 hrtime_t now; 12533 12534 dtrace_sync(); 12535 12536 now = dtrace_gethrtime(); 12537 12538 if (state != dtrace_anon.dta_state && 12539 now - state->dts_laststatus >= dtrace_deadman_user) 12540 return; 12541 12542 /* 12543 * We must be sure that dts_alive never appears to be less than the 12544 * value upon entry to dtrace_state_deadman(), and because we lack a 12545 * dtrace_cas64(), we cannot store to it atomically. We thus instead 12546 * store INT64_MAX to it, followed by a memory barrier, followed by 12547 * the new value. This assures that dts_alive never appears to be 12548 * less than its true value, regardless of the order in which the 12549 * stores to the underlying storage are issued. 12550 */ 12551 state->dts_alive = INT64_MAX; 12552 dtrace_membar_producer(); 12553 state->dts_alive = now; 12554 } 12555 12556 dtrace_state_t * 12557 dtrace_state_create(dev_t *devp, cred_t *cr) 12558 { 12559 minor_t minor; 12560 major_t major; 12561 char c[30]; 12562 dtrace_state_t *state; 12563 dtrace_optval_t *opt; 12564 int bufsize = NCPU * sizeof (dtrace_buffer_t), i; 12565 12566 ASSERT(MUTEX_HELD(&dtrace_lock)); 12567 ASSERT(MUTEX_HELD(&cpu_lock)); 12568 12569 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, 12570 VM_BESTFIT | VM_SLEEP); 12571 12572 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { 12573 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 12574 return (NULL); 12575 } 12576 12577 state = ddi_get_soft_state(dtrace_softstate, minor); 12578 state->dts_epid = DTRACE_EPIDNONE + 1; 12579 12580 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); 12581 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, 12582 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 12583 12584 if (devp != NULL) { 12585 major = getemajor(*devp); 12586 } else { 12587 major = ddi_driver_major(dtrace_devi); 12588 } 12589 12590 state->dts_dev = makedevice(major, minor); 12591 12592 if (devp != NULL) 12593 *devp = state->dts_dev; 12594 12595 /* 12596 * We allocate NCPU buffers. On the one hand, this can be quite 12597 * a bit of memory per instance (nearly 36K on a Starcat). On the 12598 * other hand, it saves an additional memory reference in the probe 12599 * path. 12600 */ 12601 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); 12602 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); 12603 state->dts_cleaner = CYCLIC_NONE; 12604 state->dts_deadman = CYCLIC_NONE; 12605 state->dts_vstate.dtvs_state = state; 12606 12607 for (i = 0; i < DTRACEOPT_MAX; i++) 12608 state->dts_options[i] = DTRACEOPT_UNSET; 12609 12610 /* 12611 * Set the default options. 12612 */ 12613 opt = state->dts_options; 12614 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; 12615 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; 12616 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; 12617 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; 12618 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; 12619 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; 12620 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; 12621 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; 12622 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; 12623 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; 12624 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; 12625 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; 12626 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; 12627 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; 12628 12629 state->dts_activity = DTRACE_ACTIVITY_INACTIVE; 12630 12631 /* 12632 * Depending on the user credentials, we set flag bits which alter probe 12633 * visibility or the amount of destructiveness allowed. In the case of 12634 * actual anonymous tracing, or the possession of all privileges, all of 12635 * the normal checks are bypassed. 12636 */ 12637 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 12638 state->dts_cred.dcr_visible = DTRACE_CRV_ALL; 12639 state->dts_cred.dcr_action = DTRACE_CRA_ALL; 12640 } else { 12641 /* 12642 * Set up the credentials for this instantiation. We take a 12643 * hold on the credential to prevent it from disappearing on 12644 * us; this in turn prevents the zone_t referenced by this 12645 * credential from disappearing. This means that we can 12646 * examine the credential and the zone from probe context. 12647 */ 12648 crhold(cr); 12649 state->dts_cred.dcr_cred = cr; 12650 12651 /* 12652 * CRA_PROC means "we have *some* privilege for dtrace" and 12653 * unlocks the use of variables like pid, zonename, etc. 12654 */ 12655 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) || 12656 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 12657 state->dts_cred.dcr_action |= DTRACE_CRA_PROC; 12658 } 12659 12660 /* 12661 * dtrace_user allows use of syscall and profile providers. 12662 * If the user also has proc_owner and/or proc_zone, we 12663 * extend the scope to include additional visibility and 12664 * destructive power. 12665 */ 12666 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) { 12667 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) { 12668 state->dts_cred.dcr_visible |= 12669 DTRACE_CRV_ALLPROC; 12670 12671 state->dts_cred.dcr_action |= 12672 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12673 } 12674 12675 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) { 12676 state->dts_cred.dcr_visible |= 12677 DTRACE_CRV_ALLZONE; 12678 12679 state->dts_cred.dcr_action |= 12680 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12681 } 12682 12683 /* 12684 * If we have all privs in whatever zone this is, 12685 * we can do destructive things to processes which 12686 * have altered credentials. 12687 */ 12688 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 12689 cr->cr_zone->zone_privset)) { 12690 state->dts_cred.dcr_action |= 12691 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12692 } 12693 } 12694 12695 /* 12696 * Holding the dtrace_kernel privilege also implies that 12697 * the user has the dtrace_user privilege from a visibility 12698 * perspective. But without further privileges, some 12699 * destructive actions are not available. 12700 */ 12701 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) { 12702 /* 12703 * Make all probes in all zones visible. However, 12704 * this doesn't mean that all actions become available 12705 * to all zones. 12706 */ 12707 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | 12708 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE; 12709 12710 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | 12711 DTRACE_CRA_PROC; 12712 /* 12713 * Holding proc_owner means that destructive actions 12714 * for *this* zone are allowed. 12715 */ 12716 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 12717 state->dts_cred.dcr_action |= 12718 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12719 12720 /* 12721 * Holding proc_zone means that destructive actions 12722 * for this user/group ID in all zones is allowed. 12723 */ 12724 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 12725 state->dts_cred.dcr_action |= 12726 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12727 12728 /* 12729 * If we have all privs in whatever zone this is, 12730 * we can do destructive things to processes which 12731 * have altered credentials. 12732 */ 12733 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 12734 cr->cr_zone->zone_privset)) { 12735 state->dts_cred.dcr_action |= 12736 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12737 } 12738 } 12739 12740 /* 12741 * Holding the dtrace_proc privilege gives control over fasttrap 12742 * and pid providers. We need to grant wider destructive 12743 * privileges in the event that the user has proc_owner and/or 12744 * proc_zone. 12745 */ 12746 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 12747 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 12748 state->dts_cred.dcr_action |= 12749 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12750 12751 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 12752 state->dts_cred.dcr_action |= 12753 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12754 } 12755 } 12756 12757 return (state); 12758 } 12759 12760 static int 12761 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) 12762 { 12763 dtrace_optval_t *opt = state->dts_options, size; 12764 processorid_t cpu; 12765 int flags = 0, rval, factor, divisor = 1; 12766 12767 ASSERT(MUTEX_HELD(&dtrace_lock)); 12768 ASSERT(MUTEX_HELD(&cpu_lock)); 12769 ASSERT(which < DTRACEOPT_MAX); 12770 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || 12771 (state == dtrace_anon.dta_state && 12772 state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); 12773 12774 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) 12775 return (0); 12776 12777 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) 12778 cpu = opt[DTRACEOPT_CPU]; 12779 12780 if (which == DTRACEOPT_SPECSIZE) 12781 flags |= DTRACEBUF_NOSWITCH; 12782 12783 if (which == DTRACEOPT_BUFSIZE) { 12784 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) 12785 flags |= DTRACEBUF_RING; 12786 12787 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) 12788 flags |= DTRACEBUF_FILL; 12789 12790 if (state != dtrace_anon.dta_state || 12791 state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 12792 flags |= DTRACEBUF_INACTIVE; 12793 } 12794 12795 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { 12796 /* 12797 * The size must be 8-byte aligned. If the size is not 8-byte 12798 * aligned, drop it down by the difference. 12799 */ 12800 if (size & (sizeof (uint64_t) - 1)) 12801 size -= size & (sizeof (uint64_t) - 1); 12802 12803 if (size < state->dts_reserve) { 12804 /* 12805 * Buffers always must be large enough to accommodate 12806 * their prereserved space. We return E2BIG instead 12807 * of ENOMEM in this case to allow for user-level 12808 * software to differentiate the cases. 12809 */ 12810 return (E2BIG); 12811 } 12812 12813 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); 12814 12815 if (rval != ENOMEM) { 12816 opt[which] = size; 12817 return (rval); 12818 } 12819 12820 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 12821 return (rval); 12822 12823 for (divisor = 2; divisor < factor; divisor <<= 1) 12824 continue; 12825 } 12826 12827 return (ENOMEM); 12828 } 12829 12830 static int 12831 dtrace_state_buffers(dtrace_state_t *state) 12832 { 12833 dtrace_speculation_t *spec = state->dts_speculations; 12834 int rval, i; 12835 12836 if ((rval = dtrace_state_buffer(state, state->dts_buffer, 12837 DTRACEOPT_BUFSIZE)) != 0) 12838 return (rval); 12839 12840 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer, 12841 DTRACEOPT_AGGSIZE)) != 0) 12842 return (rval); 12843 12844 for (i = 0; i < state->dts_nspeculations; i++) { 12845 if ((rval = dtrace_state_buffer(state, 12846 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0) 12847 return (rval); 12848 } 12849 12850 return (0); 12851 } 12852 12853 static void 12854 dtrace_state_prereserve(dtrace_state_t *state) 12855 { 12856 dtrace_ecb_t *ecb; 12857 dtrace_probe_t *probe; 12858 12859 state->dts_reserve = 0; 12860 12861 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) 12862 return; 12863 12864 /* 12865 * If our buffer policy is a "fill" buffer policy, we need to set the 12866 * prereserved space to be the space required by the END probes. 12867 */ 12868 probe = dtrace_probes[dtrace_probeid_end - 1]; 12869 ASSERT(probe != NULL); 12870 12871 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 12872 if (ecb->dte_state != state) 12873 continue; 12874 12875 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; 12876 } 12877 } 12878 12879 static int 12880 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) 12881 { 12882 dtrace_optval_t *opt = state->dts_options, sz, nspec; 12883 dtrace_speculation_t *spec; 12884 dtrace_buffer_t *buf; 12885 cyc_handler_t hdlr; 12886 cyc_time_t when; 12887 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t); 12888 dtrace_icookie_t cookie; 12889 12890 mutex_enter(&cpu_lock); 12891 mutex_enter(&dtrace_lock); 12892 12893 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 12894 rval = EBUSY; 12895 goto out; 12896 } 12897 12898 /* 12899 * Before we can perform any checks, we must prime all of the 12900 * retained enablings that correspond to this state. 12901 */ 12902 dtrace_enabling_prime(state); 12903 12904 if (state->dts_destructive && !state->dts_cred.dcr_destructive) { 12905 rval = EACCES; 12906 goto out; 12907 } 12908 12909 dtrace_state_prereserve(state); 12910 12911 /* 12912 * Now we want to do is try to allocate our speculations. 12913 * We do not automatically resize the number of speculations; if 12914 * this fails, we will fail the operation. 12915 */ 12916 nspec = opt[DTRACEOPT_NSPEC]; 12917 ASSERT(nspec != DTRACEOPT_UNSET); 12918 12919 if (nspec > INT_MAX) { 12920 rval = ENOMEM; 12921 goto out; 12922 } 12923 12924 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), 12925 KM_NOSLEEP | KM_NORMALPRI); 12926 12927 if (spec == NULL) { 12928 rval = ENOMEM; 12929 goto out; 12930 } 12931 12932 state->dts_speculations = spec; 12933 state->dts_nspeculations = (int)nspec; 12934 12935 for (i = 0; i < nspec; i++) { 12936 if ((buf = kmem_zalloc(bufsize, 12937 KM_NOSLEEP | KM_NORMALPRI)) == NULL) { 12938 rval = ENOMEM; 12939 goto err; 12940 } 12941 12942 spec[i].dtsp_buffer = buf; 12943 } 12944 12945 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { 12946 if (dtrace_anon.dta_state == NULL) { 12947 rval = ENOENT; 12948 goto out; 12949 } 12950 12951 if (state->dts_necbs != 0) { 12952 rval = EALREADY; 12953 goto out; 12954 } 12955 12956 state->dts_anon = dtrace_anon_grab(); 12957 ASSERT(state->dts_anon != NULL); 12958 state = state->dts_anon; 12959 12960 /* 12961 * We want "grabanon" to be set in the grabbed state, so we'll 12962 * copy that option value from the grabbing state into the 12963 * grabbed state. 12964 */ 12965 state->dts_options[DTRACEOPT_GRABANON] = 12966 opt[DTRACEOPT_GRABANON]; 12967 12968 *cpu = dtrace_anon.dta_beganon; 12969 12970 /* 12971 * If the anonymous state is active (as it almost certainly 12972 * is if the anonymous enabling ultimately matched anything), 12973 * we don't allow any further option processing -- but we 12974 * don't return failure. 12975 */ 12976 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 12977 goto out; 12978 } 12979 12980 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && 12981 opt[DTRACEOPT_AGGSIZE] != 0) { 12982 if (state->dts_aggregations == NULL) { 12983 /* 12984 * We're not going to create an aggregation buffer 12985 * because we don't have any ECBs that contain 12986 * aggregations -- set this option to 0. 12987 */ 12988 opt[DTRACEOPT_AGGSIZE] = 0; 12989 } else { 12990 /* 12991 * If we have an aggregation buffer, we must also have 12992 * a buffer to use as scratch. 12993 */ 12994 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || 12995 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { 12996 opt[DTRACEOPT_BUFSIZE] = state->dts_needed; 12997 } 12998 } 12999 } 13000 13001 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && 13002 opt[DTRACEOPT_SPECSIZE] != 0) { 13003 if (!state->dts_speculates) { 13004 /* 13005 * We're not going to create speculation buffers 13006 * because we don't have any ECBs that actually 13007 * speculate -- set the speculation size to 0. 13008 */ 13009 opt[DTRACEOPT_SPECSIZE] = 0; 13010 } 13011 } 13012 13013 /* 13014 * The bare minimum size for any buffer that we're actually going to 13015 * do anything to is sizeof (uint64_t). 13016 */ 13017 sz = sizeof (uint64_t); 13018 13019 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || 13020 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || 13021 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) { 13022 /* 13023 * A buffer size has been explicitly set to 0 (or to a size 13024 * that will be adjusted to 0) and we need the space -- we 13025 * need to return failure. We return ENOSPC to differentiate 13026 * it from failing to allocate a buffer due to failure to meet 13027 * the reserve (for which we return E2BIG). 13028 */ 13029 rval = ENOSPC; 13030 goto out; 13031 } 13032 13033 if ((rval = dtrace_state_buffers(state)) != 0) 13034 goto err; 13035 13036 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET) 13037 sz = dtrace_dstate_defsize; 13038 13039 do { 13040 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); 13041 13042 if (rval == 0) 13043 break; 13044 13045 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 13046 goto err; 13047 } while (sz >>= 1); 13048 13049 opt[DTRACEOPT_DYNVARSIZE] = sz; 13050 13051 if (rval != 0) 13052 goto err; 13053 13054 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) 13055 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; 13056 13057 if (opt[DTRACEOPT_CLEANRATE] == 0) 13058 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 13059 13060 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) 13061 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; 13062 13063 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) 13064 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 13065 13066 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; 13067 hdlr.cyh_arg = state; 13068 hdlr.cyh_level = CY_LOW_LEVEL; 13069 13070 when.cyt_when = 0; 13071 when.cyt_interval = opt[DTRACEOPT_CLEANRATE]; 13072 13073 state->dts_cleaner = cyclic_add(&hdlr, &when); 13074 13075 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; 13076 hdlr.cyh_arg = state; 13077 hdlr.cyh_level = CY_LOW_LEVEL; 13078 13079 when.cyt_when = 0; 13080 when.cyt_interval = dtrace_deadman_interval; 13081 13082 state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); 13083 state->dts_deadman = cyclic_add(&hdlr, &when); 13084 13085 state->dts_activity = DTRACE_ACTIVITY_WARMUP; 13086 13087 /* 13088 * Now it's time to actually fire the BEGIN probe. We need to disable 13089 * interrupts here both to record the CPU on which we fired the BEGIN 13090 * probe (the data from this CPU will be processed first at user 13091 * level) and to manually activate the buffer for this CPU. 13092 */ 13093 cookie = dtrace_interrupt_disable(); 13094 *cpu = CPU->cpu_id; 13095 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE); 13096 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; 13097 13098 dtrace_probe(dtrace_probeid_begin, 13099 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 13100 dtrace_interrupt_enable(cookie); 13101 /* 13102 * We may have had an exit action from a BEGIN probe; only change our 13103 * state to ACTIVE if we're still in WARMUP. 13104 */ 13105 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || 13106 state->dts_activity == DTRACE_ACTIVITY_DRAINING); 13107 13108 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) 13109 state->dts_activity = DTRACE_ACTIVITY_ACTIVE; 13110 13111 /* 13112 * Regardless of whether or not now we're in ACTIVE or DRAINING, we 13113 * want each CPU to transition its principal buffer out of the 13114 * INACTIVE state. Doing this assures that no CPU will suddenly begin 13115 * processing an ECB halfway down a probe's ECB chain; all CPUs will 13116 * atomically transition from processing none of a state's ECBs to 13117 * processing all of them. 13118 */ 13119 dtrace_xcall(DTRACE_CPUALL, 13120 (dtrace_xcall_t)dtrace_buffer_activate, state); 13121 goto out; 13122 13123 err: 13124 dtrace_buffer_free(state->dts_buffer); 13125 dtrace_buffer_free(state->dts_aggbuffer); 13126 13127 if ((nspec = state->dts_nspeculations) == 0) { 13128 ASSERT(state->dts_speculations == NULL); 13129 goto out; 13130 } 13131 13132 spec = state->dts_speculations; 13133 ASSERT(spec != NULL); 13134 13135 for (i = 0; i < state->dts_nspeculations; i++) { 13136 if ((buf = spec[i].dtsp_buffer) == NULL) 13137 break; 13138 13139 dtrace_buffer_free(buf); 13140 kmem_free(buf, bufsize); 13141 } 13142 13143 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 13144 state->dts_nspeculations = 0; 13145 state->dts_speculations = NULL; 13146 13147 out: 13148 mutex_exit(&dtrace_lock); 13149 mutex_exit(&cpu_lock); 13150 13151 return (rval); 13152 } 13153 13154 static int 13155 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) 13156 { 13157 dtrace_icookie_t cookie; 13158 13159 ASSERT(MUTEX_HELD(&dtrace_lock)); 13160 13161 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && 13162 state->dts_activity != DTRACE_ACTIVITY_DRAINING) 13163 return (EINVAL); 13164 13165 /* 13166 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 13167 * to be sure that every CPU has seen it. See below for the details 13168 * on why this is done. 13169 */ 13170 state->dts_activity = DTRACE_ACTIVITY_DRAINING; 13171 dtrace_sync(); 13172 13173 /* 13174 * By this point, it is impossible for any CPU to be still processing 13175 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to 13176 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 13177 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() 13178 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 13179 * iff we're in the END probe. 13180 */ 13181 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; 13182 dtrace_sync(); 13183 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); 13184 13185 /* 13186 * Finally, we can release the reserve and call the END probe. We 13187 * disable interrupts across calling the END probe to allow us to 13188 * return the CPU on which we actually called the END probe. This 13189 * allows user-land to be sure that this CPU's principal buffer is 13190 * processed last. 13191 */ 13192 state->dts_reserve = 0; 13193 13194 cookie = dtrace_interrupt_disable(); 13195 *cpu = CPU->cpu_id; 13196 dtrace_probe(dtrace_probeid_end, 13197 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 13198 dtrace_interrupt_enable(cookie); 13199 13200 state->dts_activity = DTRACE_ACTIVITY_STOPPED; 13201 dtrace_sync(); 13202 13203 return (0); 13204 } 13205 13206 static int 13207 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, 13208 dtrace_optval_t val) 13209 { 13210 ASSERT(MUTEX_HELD(&dtrace_lock)); 13211 13212 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 13213 return (EBUSY); 13214 13215 if (option >= DTRACEOPT_MAX) 13216 return (EINVAL); 13217 13218 if (option != DTRACEOPT_CPU && val < 0) 13219 return (EINVAL); 13220 13221 switch (option) { 13222 case DTRACEOPT_DESTRUCTIVE: 13223 if (dtrace_destructive_disallow) 13224 return (EACCES); 13225 13226 state->dts_cred.dcr_destructive = 1; 13227 break; 13228 13229 case DTRACEOPT_BUFSIZE: 13230 case DTRACEOPT_DYNVARSIZE: 13231 case DTRACEOPT_AGGSIZE: 13232 case DTRACEOPT_SPECSIZE: 13233 case DTRACEOPT_STRSIZE: 13234 if (val < 0) 13235 return (EINVAL); 13236 13237 if (val >= LONG_MAX) { 13238 /* 13239 * If this is an otherwise negative value, set it to 13240 * the highest multiple of 128m less than LONG_MAX. 13241 * Technically, we're adjusting the size without 13242 * regard to the buffer resizing policy, but in fact, 13243 * this has no effect -- if we set the buffer size to 13244 * ~LONG_MAX and the buffer policy is ultimately set to 13245 * be "manual", the buffer allocation is guaranteed to 13246 * fail, if only because the allocation requires two 13247 * buffers. (We set the the size to the highest 13248 * multiple of 128m because it ensures that the size 13249 * will remain a multiple of a megabyte when 13250 * repeatedly halved -- all the way down to 15m.) 13251 */ 13252 val = LONG_MAX - (1 << 27) + 1; 13253 } 13254 } 13255 13256 state->dts_options[option] = val; 13257 13258 return (0); 13259 } 13260 13261 static void 13262 dtrace_state_destroy(dtrace_state_t *state) 13263 { 13264 dtrace_ecb_t *ecb; 13265 dtrace_vstate_t *vstate = &state->dts_vstate; 13266 minor_t minor = getminor(state->dts_dev); 13267 int i, bufsize = NCPU * sizeof (dtrace_buffer_t); 13268 dtrace_speculation_t *spec = state->dts_speculations; 13269 int nspec = state->dts_nspeculations; 13270 uint32_t match; 13271 13272 ASSERT(MUTEX_HELD(&dtrace_lock)); 13273 ASSERT(MUTEX_HELD(&cpu_lock)); 13274 13275 /* 13276 * First, retract any retained enablings for this state. 13277 */ 13278 dtrace_enabling_retract(state); 13279 ASSERT(state->dts_nretained == 0); 13280 13281 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || 13282 state->dts_activity == DTRACE_ACTIVITY_DRAINING) { 13283 /* 13284 * We have managed to come into dtrace_state_destroy() on a 13285 * hot enabling -- almost certainly because of a disorderly 13286 * shutdown of a consumer. (That is, a consumer that is 13287 * exiting without having called dtrace_stop().) In this case, 13288 * we're going to set our activity to be KILLED, and then 13289 * issue a sync to be sure that everyone is out of probe 13290 * context before we start blowing away ECBs. 13291 */ 13292 state->dts_activity = DTRACE_ACTIVITY_KILLED; 13293 dtrace_sync(); 13294 } 13295 13296 /* 13297 * Release the credential hold we took in dtrace_state_create(). 13298 */ 13299 if (state->dts_cred.dcr_cred != NULL) 13300 crfree(state->dts_cred.dcr_cred); 13301 13302 /* 13303 * Now we can safely disable and destroy any enabled probes. Because 13304 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 13305 * (especially if they're all enabled), we take two passes through the 13306 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and 13307 * in the second we disable whatever is left over. 13308 */ 13309 for (match = DTRACE_PRIV_KERNEL; ; match = 0) { 13310 for (i = 0; i < state->dts_necbs; i++) { 13311 if ((ecb = state->dts_ecbs[i]) == NULL) 13312 continue; 13313 13314 if (match && ecb->dte_probe != NULL) { 13315 dtrace_probe_t *probe = ecb->dte_probe; 13316 dtrace_provider_t *prov = probe->dtpr_provider; 13317 13318 if (!(prov->dtpv_priv.dtpp_flags & match)) 13319 continue; 13320 } 13321 13322 dtrace_ecb_disable(ecb); 13323 dtrace_ecb_destroy(ecb); 13324 } 13325 13326 if (!match) 13327 break; 13328 } 13329 13330 /* 13331 * Before we free the buffers, perform one more sync to assure that 13332 * every CPU is out of probe context. 13333 */ 13334 dtrace_sync(); 13335 13336 dtrace_buffer_free(state->dts_buffer); 13337 dtrace_buffer_free(state->dts_aggbuffer); 13338 13339 for (i = 0; i < nspec; i++) 13340 dtrace_buffer_free(spec[i].dtsp_buffer); 13341 13342 if (state->dts_cleaner != CYCLIC_NONE) 13343 cyclic_remove(state->dts_cleaner); 13344 13345 if (state->dts_deadman != CYCLIC_NONE) 13346 cyclic_remove(state->dts_deadman); 13347 13348 dtrace_dstate_fini(&vstate->dtvs_dynvars); 13349 dtrace_vstate_fini(vstate); 13350 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); 13351 13352 if (state->dts_aggregations != NULL) { 13353 #ifdef DEBUG 13354 for (i = 0; i < state->dts_naggregations; i++) 13355 ASSERT(state->dts_aggregations[i] == NULL); 13356 #endif 13357 ASSERT(state->dts_naggregations > 0); 13358 kmem_free(state->dts_aggregations, 13359 state->dts_naggregations * sizeof (dtrace_aggregation_t *)); 13360 } 13361 13362 kmem_free(state->dts_buffer, bufsize); 13363 kmem_free(state->dts_aggbuffer, bufsize); 13364 13365 for (i = 0; i < nspec; i++) 13366 kmem_free(spec[i].dtsp_buffer, bufsize); 13367 13368 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 13369 13370 dtrace_format_destroy(state); 13371 13372 vmem_destroy(state->dts_aggid_arena); 13373 ddi_soft_state_free(dtrace_softstate, minor); 13374 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 13375 } 13376 13377 /* 13378 * DTrace Anonymous Enabling Functions 13379 */ 13380 static dtrace_state_t * 13381 dtrace_anon_grab(void) 13382 { 13383 dtrace_state_t *state; 13384 13385 ASSERT(MUTEX_HELD(&dtrace_lock)); 13386 13387 if ((state = dtrace_anon.dta_state) == NULL) { 13388 ASSERT(dtrace_anon.dta_enabling == NULL); 13389 return (NULL); 13390 } 13391 13392 ASSERT(dtrace_anon.dta_enabling != NULL); 13393 ASSERT(dtrace_retained != NULL); 13394 13395 dtrace_enabling_destroy(dtrace_anon.dta_enabling); 13396 dtrace_anon.dta_enabling = NULL; 13397 dtrace_anon.dta_state = NULL; 13398 13399 return (state); 13400 } 13401 13402 static void 13403 dtrace_anon_property(void) 13404 { 13405 int i, rv; 13406 dtrace_state_t *state; 13407 dof_hdr_t *dof; 13408 char c[32]; /* enough for "dof-data-" + digits */ 13409 13410 ASSERT(MUTEX_HELD(&dtrace_lock)); 13411 ASSERT(MUTEX_HELD(&cpu_lock)); 13412 13413 for (i = 0; ; i++) { 13414 (void) snprintf(c, sizeof (c), "dof-data-%d", i); 13415 13416 dtrace_err_verbose = 1; 13417 13418 if ((dof = dtrace_dof_property(c)) == NULL) { 13419 dtrace_err_verbose = 0; 13420 break; 13421 } 13422 13423 /* 13424 * We want to create anonymous state, so we need to transition 13425 * the kernel debugger to indicate that DTrace is active. If 13426 * this fails (e.g. because the debugger has modified text in 13427 * some way), we won't continue with the processing. 13428 */ 13429 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 13430 cmn_err(CE_NOTE, "kernel debugger active; anonymous " 13431 "enabling ignored."); 13432 dtrace_dof_destroy(dof); 13433 break; 13434 } 13435 13436 /* 13437 * If we haven't allocated an anonymous state, we'll do so now. 13438 */ 13439 if ((state = dtrace_anon.dta_state) == NULL) { 13440 state = dtrace_state_create(NULL, NULL); 13441 dtrace_anon.dta_state = state; 13442 13443 if (state == NULL) { 13444 /* 13445 * This basically shouldn't happen: the only 13446 * failure mode from dtrace_state_create() is a 13447 * failure of ddi_soft_state_zalloc() that 13448 * itself should never happen. Still, the 13449 * interface allows for a failure mode, and 13450 * we want to fail as gracefully as possible: 13451 * we'll emit an error message and cease 13452 * processing anonymous state in this case. 13453 */ 13454 cmn_err(CE_WARN, "failed to create " 13455 "anonymous state"); 13456 dtrace_dof_destroy(dof); 13457 break; 13458 } 13459 } 13460 13461 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(), 13462 &dtrace_anon.dta_enabling, 0, B_TRUE); 13463 13464 if (rv == 0) 13465 rv = dtrace_dof_options(dof, state); 13466 13467 dtrace_err_verbose = 0; 13468 dtrace_dof_destroy(dof); 13469 13470 if (rv != 0) { 13471 /* 13472 * This is malformed DOF; chuck any anonymous state 13473 * that we created. 13474 */ 13475 ASSERT(dtrace_anon.dta_enabling == NULL); 13476 dtrace_state_destroy(state); 13477 dtrace_anon.dta_state = NULL; 13478 break; 13479 } 13480 13481 ASSERT(dtrace_anon.dta_enabling != NULL); 13482 } 13483 13484 if (dtrace_anon.dta_enabling != NULL) { 13485 int rval; 13486 13487 /* 13488 * dtrace_enabling_retain() can only fail because we are 13489 * trying to retain more enablings than are allowed -- but 13490 * we only have one anonymous enabling, and we are guaranteed 13491 * to be allowed at least one retained enabling; we assert 13492 * that dtrace_enabling_retain() returns success. 13493 */ 13494 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); 13495 ASSERT(rval == 0); 13496 13497 dtrace_enabling_dump(dtrace_anon.dta_enabling); 13498 } 13499 } 13500 13501 /* 13502 * DTrace Helper Functions 13503 */ 13504 static void 13505 dtrace_helper_trace(dtrace_helper_action_t *helper, 13506 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) 13507 { 13508 uint32_t size, next, nnext, i; 13509 dtrace_helptrace_t *ent; 13510 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 13511 13512 if (!dtrace_helptrace_enabled) 13513 return; 13514 13515 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); 13516 13517 /* 13518 * What would a tracing framework be without its own tracing 13519 * framework? (Well, a hell of a lot simpler, for starters...) 13520 */ 13521 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals * 13522 sizeof (uint64_t) - sizeof (uint64_t); 13523 13524 /* 13525 * Iterate until we can allocate a slot in the trace buffer. 13526 */ 13527 do { 13528 next = dtrace_helptrace_next; 13529 13530 if (next + size < dtrace_helptrace_bufsize) { 13531 nnext = next + size; 13532 } else { 13533 nnext = size; 13534 } 13535 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next); 13536 13537 /* 13538 * We have our slot; fill it in. 13539 */ 13540 if (nnext == size) 13541 next = 0; 13542 13543 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next]; 13544 ent->dtht_helper = helper; 13545 ent->dtht_where = where; 13546 ent->dtht_nlocals = vstate->dtvs_nlocals; 13547 13548 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ? 13549 mstate->dtms_fltoffs : -1; 13550 ent->dtht_fault = DTRACE_FLAGS2FLT(flags); 13551 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 13552 13553 for (i = 0; i < vstate->dtvs_nlocals; i++) { 13554 dtrace_statvar_t *svar; 13555 13556 if ((svar = vstate->dtvs_locals[i]) == NULL) 13557 continue; 13558 13559 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t)); 13560 ent->dtht_locals[i] = 13561 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id]; 13562 } 13563 } 13564 13565 static uint64_t 13566 dtrace_helper(int which, dtrace_mstate_t *mstate, 13567 dtrace_state_t *state, uint64_t arg0, uint64_t arg1) 13568 { 13569 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 13570 uint64_t sarg0 = mstate->dtms_arg[0]; 13571 uint64_t sarg1 = mstate->dtms_arg[1]; 13572 uint64_t rval; 13573 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; 13574 dtrace_helper_action_t *helper; 13575 dtrace_vstate_t *vstate; 13576 dtrace_difo_t *pred; 13577 int i, trace = dtrace_helptrace_enabled; 13578 13579 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); 13580 13581 if (helpers == NULL) 13582 return (0); 13583 13584 if ((helper = helpers->dthps_actions[which]) == NULL) 13585 return (0); 13586 13587 vstate = &helpers->dthps_vstate; 13588 mstate->dtms_arg[0] = arg0; 13589 mstate->dtms_arg[1] = arg1; 13590 13591 /* 13592 * Now iterate over each helper. If its predicate evaluates to 'true', 13593 * we'll call the corresponding actions. Note that the below calls 13594 * to dtrace_dif_emulate() may set faults in machine state. This is 13595 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow 13596 * the stored DIF offset with its own (which is the desired behavior). 13597 * Also, note the calls to dtrace_dif_emulate() may allocate scratch 13598 * from machine state; this is okay, too. 13599 */ 13600 for (; helper != NULL; helper = helper->dtha_next) { 13601 if ((pred = helper->dtha_predicate) != NULL) { 13602 if (trace) 13603 dtrace_helper_trace(helper, mstate, vstate, 0); 13604 13605 if (!dtrace_dif_emulate(pred, mstate, vstate, state)) 13606 goto next; 13607 13608 if (*flags & CPU_DTRACE_FAULT) 13609 goto err; 13610 } 13611 13612 for (i = 0; i < helper->dtha_nactions; i++) { 13613 if (trace) 13614 dtrace_helper_trace(helper, 13615 mstate, vstate, i + 1); 13616 13617 rval = dtrace_dif_emulate(helper->dtha_actions[i], 13618 mstate, vstate, state); 13619 13620 if (*flags & CPU_DTRACE_FAULT) 13621 goto err; 13622 } 13623 13624 next: 13625 if (trace) 13626 dtrace_helper_trace(helper, mstate, vstate, 13627 DTRACE_HELPTRACE_NEXT); 13628 } 13629 13630 if (trace) 13631 dtrace_helper_trace(helper, mstate, vstate, 13632 DTRACE_HELPTRACE_DONE); 13633 13634 /* 13635 * Restore the arg0 that we saved upon entry. 13636 */ 13637 mstate->dtms_arg[0] = sarg0; 13638 mstate->dtms_arg[1] = sarg1; 13639 13640 return (rval); 13641 13642 err: 13643 if (trace) 13644 dtrace_helper_trace(helper, mstate, vstate, 13645 DTRACE_HELPTRACE_ERR); 13646 13647 /* 13648 * Restore the arg0 that we saved upon entry. 13649 */ 13650 mstate->dtms_arg[0] = sarg0; 13651 mstate->dtms_arg[1] = sarg1; 13652 13653 return (NULL); 13654 } 13655 13656 static void 13657 dtrace_helper_action_destroy(dtrace_helper_action_t *helper, 13658 dtrace_vstate_t *vstate) 13659 { 13660 int i; 13661 13662 if (helper->dtha_predicate != NULL) 13663 dtrace_difo_release(helper->dtha_predicate, vstate); 13664 13665 for (i = 0; i < helper->dtha_nactions; i++) { 13666 ASSERT(helper->dtha_actions[i] != NULL); 13667 dtrace_difo_release(helper->dtha_actions[i], vstate); 13668 } 13669 13670 kmem_free(helper->dtha_actions, 13671 helper->dtha_nactions * sizeof (dtrace_difo_t *)); 13672 kmem_free(helper, sizeof (dtrace_helper_action_t)); 13673 } 13674 13675 static int 13676 dtrace_helper_destroygen(int gen) 13677 { 13678 proc_t *p = curproc; 13679 dtrace_helpers_t *help = p->p_dtrace_helpers; 13680 dtrace_vstate_t *vstate; 13681 int i; 13682 13683 ASSERT(MUTEX_HELD(&dtrace_lock)); 13684 13685 if (help == NULL || gen > help->dthps_generation) 13686 return (EINVAL); 13687 13688 vstate = &help->dthps_vstate; 13689 13690 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 13691 dtrace_helper_action_t *last = NULL, *h, *next; 13692 13693 for (h = help->dthps_actions[i]; h != NULL; h = next) { 13694 next = h->dtha_next; 13695 13696 if (h->dtha_generation == gen) { 13697 if (last != NULL) { 13698 last->dtha_next = next; 13699 } else { 13700 help->dthps_actions[i] = next; 13701 } 13702 13703 dtrace_helper_action_destroy(h, vstate); 13704 } else { 13705 last = h; 13706 } 13707 } 13708 } 13709 13710 /* 13711 * Interate until we've cleared out all helper providers with the 13712 * given generation number. 13713 */ 13714 for (;;) { 13715 dtrace_helper_provider_t *prov; 13716 13717 /* 13718 * Look for a helper provider with the right generation. We 13719 * have to start back at the beginning of the list each time 13720 * because we drop dtrace_lock. It's unlikely that we'll make 13721 * more than two passes. 13722 */ 13723 for (i = 0; i < help->dthps_nprovs; i++) { 13724 prov = help->dthps_provs[i]; 13725 13726 if (prov->dthp_generation == gen) 13727 break; 13728 } 13729 13730 /* 13731 * If there were no matches, we're done. 13732 */ 13733 if (i == help->dthps_nprovs) 13734 break; 13735 13736 /* 13737 * Move the last helper provider into this slot. 13738 */ 13739 help->dthps_nprovs--; 13740 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs]; 13741 help->dthps_provs[help->dthps_nprovs] = NULL; 13742 13743 mutex_exit(&dtrace_lock); 13744 13745 /* 13746 * If we have a meta provider, remove this helper provider. 13747 */ 13748 mutex_enter(&dtrace_meta_lock); 13749 if (dtrace_meta_pid != NULL) { 13750 ASSERT(dtrace_deferred_pid == NULL); 13751 dtrace_helper_provider_remove(&prov->dthp_prov, 13752 p->p_pid); 13753 } 13754 mutex_exit(&dtrace_meta_lock); 13755 13756 dtrace_helper_provider_destroy(prov); 13757 13758 mutex_enter(&dtrace_lock); 13759 } 13760 13761 return (0); 13762 } 13763 13764 static int 13765 dtrace_helper_validate(dtrace_helper_action_t *helper) 13766 { 13767 int err = 0, i; 13768 dtrace_difo_t *dp; 13769 13770 if ((dp = helper->dtha_predicate) != NULL) 13771 err += dtrace_difo_validate_helper(dp); 13772 13773 for (i = 0; i < helper->dtha_nactions; i++) 13774 err += dtrace_difo_validate_helper(helper->dtha_actions[i]); 13775 13776 return (err == 0); 13777 } 13778 13779 static int 13780 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) 13781 { 13782 dtrace_helpers_t *help; 13783 dtrace_helper_action_t *helper, *last; 13784 dtrace_actdesc_t *act; 13785 dtrace_vstate_t *vstate; 13786 dtrace_predicate_t *pred; 13787 int count = 0, nactions = 0, i; 13788 13789 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) 13790 return (EINVAL); 13791 13792 help = curproc->p_dtrace_helpers; 13793 last = help->dthps_actions[which]; 13794 vstate = &help->dthps_vstate; 13795 13796 for (count = 0; last != NULL; last = last->dtha_next) { 13797 count++; 13798 if (last->dtha_next == NULL) 13799 break; 13800 } 13801 13802 /* 13803 * If we already have dtrace_helper_actions_max helper actions for this 13804 * helper action type, we'll refuse to add a new one. 13805 */ 13806 if (count >= dtrace_helper_actions_max) 13807 return (ENOSPC); 13808 13809 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); 13810 helper->dtha_generation = help->dthps_generation; 13811 13812 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) { 13813 ASSERT(pred->dtp_difo != NULL); 13814 dtrace_difo_hold(pred->dtp_difo); 13815 helper->dtha_predicate = pred->dtp_difo; 13816 } 13817 13818 for (act = ep->dted_action; act != NULL; act = act->dtad_next) { 13819 if (act->dtad_kind != DTRACEACT_DIFEXPR) 13820 goto err; 13821 13822 if (act->dtad_difo == NULL) 13823 goto err; 13824 13825 nactions++; 13826 } 13827 13828 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) * 13829 (helper->dtha_nactions = nactions), KM_SLEEP); 13830 13831 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { 13832 dtrace_difo_hold(act->dtad_difo); 13833 helper->dtha_actions[i++] = act->dtad_difo; 13834 } 13835 13836 if (!dtrace_helper_validate(helper)) 13837 goto err; 13838 13839 if (last == NULL) { 13840 help->dthps_actions[which] = helper; 13841 } else { 13842 last->dtha_next = helper; 13843 } 13844 13845 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { 13846 dtrace_helptrace_nlocals = vstate->dtvs_nlocals; 13847 dtrace_helptrace_next = 0; 13848 } 13849 13850 return (0); 13851 err: 13852 dtrace_helper_action_destroy(helper, vstate); 13853 return (EINVAL); 13854 } 13855 13856 static void 13857 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, 13858 dof_helper_t *dofhp) 13859 { 13860 ASSERT(MUTEX_NOT_HELD(&dtrace_lock)); 13861 13862 mutex_enter(&dtrace_meta_lock); 13863 mutex_enter(&dtrace_lock); 13864 13865 if (!dtrace_attached() || dtrace_meta_pid == NULL) { 13866 /* 13867 * If the dtrace module is loaded but not attached, or if 13868 * there aren't isn't a meta provider registered to deal with 13869 * these provider descriptions, we need to postpone creating 13870 * the actual providers until later. 13871 */ 13872 13873 if (help->dthps_next == NULL && help->dthps_prev == NULL && 13874 dtrace_deferred_pid != help) { 13875 help->dthps_deferred = 1; 13876 help->dthps_pid = p->p_pid; 13877 help->dthps_next = dtrace_deferred_pid; 13878 help->dthps_prev = NULL; 13879 if (dtrace_deferred_pid != NULL) 13880 dtrace_deferred_pid->dthps_prev = help; 13881 dtrace_deferred_pid = help; 13882 } 13883 13884 mutex_exit(&dtrace_lock); 13885 13886 } else if (dofhp != NULL) { 13887 /* 13888 * If the dtrace module is loaded and we have a particular 13889 * helper provider description, pass that off to the 13890 * meta provider. 13891 */ 13892 13893 mutex_exit(&dtrace_lock); 13894 13895 dtrace_helper_provide(dofhp, p->p_pid); 13896 13897 } else { 13898 /* 13899 * Otherwise, just pass all the helper provider descriptions 13900 * off to the meta provider. 13901 */ 13902 13903 int i; 13904 mutex_exit(&dtrace_lock); 13905 13906 for (i = 0; i < help->dthps_nprovs; i++) { 13907 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 13908 p->p_pid); 13909 } 13910 } 13911 13912 mutex_exit(&dtrace_meta_lock); 13913 } 13914 13915 static int 13916 dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) 13917 { 13918 dtrace_helpers_t *help; 13919 dtrace_helper_provider_t *hprov, **tmp_provs; 13920 uint_t tmp_maxprovs, i; 13921 13922 ASSERT(MUTEX_HELD(&dtrace_lock)); 13923 13924 help = curproc->p_dtrace_helpers; 13925 ASSERT(help != NULL); 13926 13927 /* 13928 * If we already have dtrace_helper_providers_max helper providers, 13929 * we're refuse to add a new one. 13930 */ 13931 if (help->dthps_nprovs >= dtrace_helper_providers_max) 13932 return (ENOSPC); 13933 13934 /* 13935 * Check to make sure this isn't a duplicate. 13936 */ 13937 for (i = 0; i < help->dthps_nprovs; i++) { 13938 if (dofhp->dofhp_addr == 13939 help->dthps_provs[i]->dthp_prov.dofhp_addr) 13940 return (EALREADY); 13941 } 13942 13943 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP); 13944 hprov->dthp_prov = *dofhp; 13945 hprov->dthp_ref = 1; 13946 hprov->dthp_generation = gen; 13947 13948 /* 13949 * Allocate a bigger table for helper providers if it's already full. 13950 */ 13951 if (help->dthps_maxprovs == help->dthps_nprovs) { 13952 tmp_maxprovs = help->dthps_maxprovs; 13953 tmp_provs = help->dthps_provs; 13954 13955 if (help->dthps_maxprovs == 0) 13956 help->dthps_maxprovs = 2; 13957 else 13958 help->dthps_maxprovs *= 2; 13959 if (help->dthps_maxprovs > dtrace_helper_providers_max) 13960 help->dthps_maxprovs = dtrace_helper_providers_max; 13961 13962 ASSERT(tmp_maxprovs < help->dthps_maxprovs); 13963 13964 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs * 13965 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 13966 13967 if (tmp_provs != NULL) { 13968 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs * 13969 sizeof (dtrace_helper_provider_t *)); 13970 kmem_free(tmp_provs, tmp_maxprovs * 13971 sizeof (dtrace_helper_provider_t *)); 13972 } 13973 } 13974 13975 help->dthps_provs[help->dthps_nprovs] = hprov; 13976 help->dthps_nprovs++; 13977 13978 return (0); 13979 } 13980 13981 static void 13982 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov) 13983 { 13984 mutex_enter(&dtrace_lock); 13985 13986 if (--hprov->dthp_ref == 0) { 13987 dof_hdr_t *dof; 13988 mutex_exit(&dtrace_lock); 13989 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof; 13990 dtrace_dof_destroy(dof); 13991 kmem_free(hprov, sizeof (dtrace_helper_provider_t)); 13992 } else { 13993 mutex_exit(&dtrace_lock); 13994 } 13995 } 13996 13997 static int 13998 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) 13999 { 14000 uintptr_t daddr = (uintptr_t)dof; 14001 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 14002 dof_provider_t *provider; 14003 dof_probe_t *probe; 14004 uint8_t *arg; 14005 char *strtab, *typestr; 14006 dof_stridx_t typeidx; 14007 size_t typesz; 14008 uint_t nprobes, j, k; 14009 14010 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); 14011 14012 if (sec->dofs_offset & (sizeof (uint_t) - 1)) { 14013 dtrace_dof_error(dof, "misaligned section offset"); 14014 return (-1); 14015 } 14016 14017 /* 14018 * The section needs to be large enough to contain the DOF provider 14019 * structure appropriate for the given version. 14020 */ 14021 if (sec->dofs_size < 14022 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ? 14023 offsetof(dof_provider_t, dofpv_prenoffs) : 14024 sizeof (dof_provider_t))) { 14025 dtrace_dof_error(dof, "provider section too small"); 14026 return (-1); 14027 } 14028 14029 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 14030 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab); 14031 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes); 14032 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs); 14033 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs); 14034 14035 if (str_sec == NULL || prb_sec == NULL || 14036 arg_sec == NULL || off_sec == NULL) 14037 return (-1); 14038 14039 enoff_sec = NULL; 14040 14041 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 14042 provider->dofpv_prenoffs != DOF_SECT_NONE && 14043 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, 14044 provider->dofpv_prenoffs)) == NULL) 14045 return (-1); 14046 14047 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 14048 14049 if (provider->dofpv_name >= str_sec->dofs_size || 14050 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) { 14051 dtrace_dof_error(dof, "invalid provider name"); 14052 return (-1); 14053 } 14054 14055 if (prb_sec->dofs_entsize == 0 || 14056 prb_sec->dofs_entsize > prb_sec->dofs_size) { 14057 dtrace_dof_error(dof, "invalid entry size"); 14058 return (-1); 14059 } 14060 14061 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) { 14062 dtrace_dof_error(dof, "misaligned entry size"); 14063 return (-1); 14064 } 14065 14066 if (off_sec->dofs_entsize != sizeof (uint32_t)) { 14067 dtrace_dof_error(dof, "invalid entry size"); 14068 return (-1); 14069 } 14070 14071 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) { 14072 dtrace_dof_error(dof, "misaligned section offset"); 14073 return (-1); 14074 } 14075 14076 if (arg_sec->dofs_entsize != sizeof (uint8_t)) { 14077 dtrace_dof_error(dof, "invalid entry size"); 14078 return (-1); 14079 } 14080 14081 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 14082 14083 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 14084 14085 /* 14086 * Take a pass through the probes to check for errors. 14087 */ 14088 for (j = 0; j < nprobes; j++) { 14089 probe = (dof_probe_t *)(uintptr_t)(daddr + 14090 prb_sec->dofs_offset + j * prb_sec->dofs_entsize); 14091 14092 if (probe->dofpr_func >= str_sec->dofs_size) { 14093 dtrace_dof_error(dof, "invalid function name"); 14094 return (-1); 14095 } 14096 14097 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) { 14098 dtrace_dof_error(dof, "function name too long"); 14099 return (-1); 14100 } 14101 14102 if (probe->dofpr_name >= str_sec->dofs_size || 14103 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) { 14104 dtrace_dof_error(dof, "invalid probe name"); 14105 return (-1); 14106 } 14107 14108 /* 14109 * The offset count must not wrap the index, and the offsets 14110 * must also not overflow the section's data. 14111 */ 14112 if (probe->dofpr_offidx + probe->dofpr_noffs < 14113 probe->dofpr_offidx || 14114 (probe->dofpr_offidx + probe->dofpr_noffs) * 14115 off_sec->dofs_entsize > off_sec->dofs_size) { 14116 dtrace_dof_error(dof, "invalid probe offset"); 14117 return (-1); 14118 } 14119 14120 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { 14121 /* 14122 * If there's no is-enabled offset section, make sure 14123 * there aren't any is-enabled offsets. Otherwise 14124 * perform the same checks as for probe offsets 14125 * (immediately above). 14126 */ 14127 if (enoff_sec == NULL) { 14128 if (probe->dofpr_enoffidx != 0 || 14129 probe->dofpr_nenoffs != 0) { 14130 dtrace_dof_error(dof, "is-enabled " 14131 "offsets with null section"); 14132 return (-1); 14133 } 14134 } else if (probe->dofpr_enoffidx + 14135 probe->dofpr_nenoffs < probe->dofpr_enoffidx || 14136 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) * 14137 enoff_sec->dofs_entsize > enoff_sec->dofs_size) { 14138 dtrace_dof_error(dof, "invalid is-enabled " 14139 "offset"); 14140 return (-1); 14141 } 14142 14143 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) { 14144 dtrace_dof_error(dof, "zero probe and " 14145 "is-enabled offsets"); 14146 return (-1); 14147 } 14148 } else if (probe->dofpr_noffs == 0) { 14149 dtrace_dof_error(dof, "zero probe offsets"); 14150 return (-1); 14151 } 14152 14153 if (probe->dofpr_argidx + probe->dofpr_xargc < 14154 probe->dofpr_argidx || 14155 (probe->dofpr_argidx + probe->dofpr_xargc) * 14156 arg_sec->dofs_entsize > arg_sec->dofs_size) { 14157 dtrace_dof_error(dof, "invalid args"); 14158 return (-1); 14159 } 14160 14161 typeidx = probe->dofpr_nargv; 14162 typestr = strtab + probe->dofpr_nargv; 14163 for (k = 0; k < probe->dofpr_nargc; k++) { 14164 if (typeidx >= str_sec->dofs_size) { 14165 dtrace_dof_error(dof, "bad " 14166 "native argument type"); 14167 return (-1); 14168 } 14169 14170 typesz = strlen(typestr) + 1; 14171 if (typesz > DTRACE_ARGTYPELEN) { 14172 dtrace_dof_error(dof, "native " 14173 "argument type too long"); 14174 return (-1); 14175 } 14176 typeidx += typesz; 14177 typestr += typesz; 14178 } 14179 14180 typeidx = probe->dofpr_xargv; 14181 typestr = strtab + probe->dofpr_xargv; 14182 for (k = 0; k < probe->dofpr_xargc; k++) { 14183 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) { 14184 dtrace_dof_error(dof, "bad " 14185 "native argument index"); 14186 return (-1); 14187 } 14188 14189 if (typeidx >= str_sec->dofs_size) { 14190 dtrace_dof_error(dof, "bad " 14191 "translated argument type"); 14192 return (-1); 14193 } 14194 14195 typesz = strlen(typestr) + 1; 14196 if (typesz > DTRACE_ARGTYPELEN) { 14197 dtrace_dof_error(dof, "translated argument " 14198 "type too long"); 14199 return (-1); 14200 } 14201 14202 typeidx += typesz; 14203 typestr += typesz; 14204 } 14205 } 14206 14207 return (0); 14208 } 14209 14210 static int 14211 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) 14212 { 14213 dtrace_helpers_t *help; 14214 dtrace_vstate_t *vstate; 14215 dtrace_enabling_t *enab = NULL; 14216 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; 14217 uintptr_t daddr = (uintptr_t)dof; 14218 14219 ASSERT(MUTEX_HELD(&dtrace_lock)); 14220 14221 if ((help = curproc->p_dtrace_helpers) == NULL) 14222 help = dtrace_helpers_create(curproc); 14223 14224 vstate = &help->dthps_vstate; 14225 14226 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, 14227 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) { 14228 dtrace_dof_destroy(dof); 14229 return (rv); 14230 } 14231 14232 /* 14233 * Look for helper providers and validate their descriptions. 14234 */ 14235 if (dhp != NULL) { 14236 for (i = 0; i < dof->dofh_secnum; i++) { 14237 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 14238 dof->dofh_secoff + i * dof->dofh_secsize); 14239 14240 if (sec->dofs_type != DOF_SECT_PROVIDER) 14241 continue; 14242 14243 if (dtrace_helper_provider_validate(dof, sec) != 0) { 14244 dtrace_enabling_destroy(enab); 14245 dtrace_dof_destroy(dof); 14246 return (-1); 14247 } 14248 14249 nprovs++; 14250 } 14251 } 14252 14253 /* 14254 * Now we need to walk through the ECB descriptions in the enabling. 14255 */ 14256 for (i = 0; i < enab->dten_ndesc; i++) { 14257 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 14258 dtrace_probedesc_t *desc = &ep->dted_probe; 14259 14260 if (strcmp(desc->dtpd_provider, "dtrace") != 0) 14261 continue; 14262 14263 if (strcmp(desc->dtpd_mod, "helper") != 0) 14264 continue; 14265 14266 if (strcmp(desc->dtpd_func, "ustack") != 0) 14267 continue; 14268 14269 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, 14270 ep)) != 0) { 14271 /* 14272 * Adding this helper action failed -- we are now going 14273 * to rip out the entire generation and return failure. 14274 */ 14275 (void) dtrace_helper_destroygen(help->dthps_generation); 14276 dtrace_enabling_destroy(enab); 14277 dtrace_dof_destroy(dof); 14278 return (-1); 14279 } 14280 14281 nhelpers++; 14282 } 14283 14284 if (nhelpers < enab->dten_ndesc) 14285 dtrace_dof_error(dof, "unmatched helpers"); 14286 14287 gen = help->dthps_generation++; 14288 dtrace_enabling_destroy(enab); 14289 14290 if (dhp != NULL && nprovs > 0) { 14291 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; 14292 if (dtrace_helper_provider_add(dhp, gen) == 0) { 14293 mutex_exit(&dtrace_lock); 14294 dtrace_helper_provider_register(curproc, help, dhp); 14295 mutex_enter(&dtrace_lock); 14296 14297 destroy = 0; 14298 } 14299 } 14300 14301 if (destroy) 14302 dtrace_dof_destroy(dof); 14303 14304 return (gen); 14305 } 14306 14307 static dtrace_helpers_t * 14308 dtrace_helpers_create(proc_t *p) 14309 { 14310 dtrace_helpers_t *help; 14311 14312 ASSERT(MUTEX_HELD(&dtrace_lock)); 14313 ASSERT(p->p_dtrace_helpers == NULL); 14314 14315 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); 14316 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) * 14317 DTRACE_NHELPER_ACTIONS, KM_SLEEP); 14318 14319 p->p_dtrace_helpers = help; 14320 dtrace_helpers++; 14321 14322 return (help); 14323 } 14324 14325 static void 14326 dtrace_helpers_destroy(void) 14327 { 14328 dtrace_helpers_t *help; 14329 dtrace_vstate_t *vstate; 14330 proc_t *p = curproc; 14331 int i; 14332 14333 mutex_enter(&dtrace_lock); 14334 14335 ASSERT(p->p_dtrace_helpers != NULL); 14336 ASSERT(dtrace_helpers > 0); 14337 14338 help = p->p_dtrace_helpers; 14339 vstate = &help->dthps_vstate; 14340 14341 /* 14342 * We're now going to lose the help from this process. 14343 */ 14344 p->p_dtrace_helpers = NULL; 14345 dtrace_sync(); 14346 14347 /* 14348 * Destory the helper actions. 14349 */ 14350 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 14351 dtrace_helper_action_t *h, *next; 14352 14353 for (h = help->dthps_actions[i]; h != NULL; h = next) { 14354 next = h->dtha_next; 14355 dtrace_helper_action_destroy(h, vstate); 14356 h = next; 14357 } 14358 } 14359 14360 mutex_exit(&dtrace_lock); 14361 14362 /* 14363 * Destroy the helper providers. 14364 */ 14365 if (help->dthps_maxprovs > 0) { 14366 mutex_enter(&dtrace_meta_lock); 14367 if (dtrace_meta_pid != NULL) { 14368 ASSERT(dtrace_deferred_pid == NULL); 14369 14370 for (i = 0; i < help->dthps_nprovs; i++) { 14371 dtrace_helper_provider_remove( 14372 &help->dthps_provs[i]->dthp_prov, p->p_pid); 14373 } 14374 } else { 14375 mutex_enter(&dtrace_lock); 14376 ASSERT(help->dthps_deferred == 0 || 14377 help->dthps_next != NULL || 14378 help->dthps_prev != NULL || 14379 help == dtrace_deferred_pid); 14380 14381 /* 14382 * Remove the helper from the deferred list. 14383 */ 14384 if (help->dthps_next != NULL) 14385 help->dthps_next->dthps_prev = help->dthps_prev; 14386 if (help->dthps_prev != NULL) 14387 help->dthps_prev->dthps_next = help->dthps_next; 14388 if (dtrace_deferred_pid == help) { 14389 dtrace_deferred_pid = help->dthps_next; 14390 ASSERT(help->dthps_prev == NULL); 14391 } 14392 14393 mutex_exit(&dtrace_lock); 14394 } 14395 14396 mutex_exit(&dtrace_meta_lock); 14397 14398 for (i = 0; i < help->dthps_nprovs; i++) { 14399 dtrace_helper_provider_destroy(help->dthps_provs[i]); 14400 } 14401 14402 kmem_free(help->dthps_provs, help->dthps_maxprovs * 14403 sizeof (dtrace_helper_provider_t *)); 14404 } 14405 14406 mutex_enter(&dtrace_lock); 14407 14408 dtrace_vstate_fini(&help->dthps_vstate); 14409 kmem_free(help->dthps_actions, 14410 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS); 14411 kmem_free(help, sizeof (dtrace_helpers_t)); 14412 14413 --dtrace_helpers; 14414 mutex_exit(&dtrace_lock); 14415 } 14416 14417 static void 14418 dtrace_helpers_duplicate(proc_t *from, proc_t *to) 14419 { 14420 dtrace_helpers_t *help, *newhelp; 14421 dtrace_helper_action_t *helper, *new, *last; 14422 dtrace_difo_t *dp; 14423 dtrace_vstate_t *vstate; 14424 int i, j, sz, hasprovs = 0; 14425 14426 mutex_enter(&dtrace_lock); 14427 ASSERT(from->p_dtrace_helpers != NULL); 14428 ASSERT(dtrace_helpers > 0); 14429 14430 help = from->p_dtrace_helpers; 14431 newhelp = dtrace_helpers_create(to); 14432 ASSERT(to->p_dtrace_helpers != NULL); 14433 14434 newhelp->dthps_generation = help->dthps_generation; 14435 vstate = &newhelp->dthps_vstate; 14436 14437 /* 14438 * Duplicate the helper actions. 14439 */ 14440 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 14441 if ((helper = help->dthps_actions[i]) == NULL) 14442 continue; 14443 14444 for (last = NULL; helper != NULL; helper = helper->dtha_next) { 14445 new = kmem_zalloc(sizeof (dtrace_helper_action_t), 14446 KM_SLEEP); 14447 new->dtha_generation = helper->dtha_generation; 14448 14449 if ((dp = helper->dtha_predicate) != NULL) { 14450 dp = dtrace_difo_duplicate(dp, vstate); 14451 new->dtha_predicate = dp; 14452 } 14453 14454 new->dtha_nactions = helper->dtha_nactions; 14455 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; 14456 new->dtha_actions = kmem_alloc(sz, KM_SLEEP); 14457 14458 for (j = 0; j < new->dtha_nactions; j++) { 14459 dtrace_difo_t *dp = helper->dtha_actions[j]; 14460 14461 ASSERT(dp != NULL); 14462 dp = dtrace_difo_duplicate(dp, vstate); 14463 new->dtha_actions[j] = dp; 14464 } 14465 14466 if (last != NULL) { 14467 last->dtha_next = new; 14468 } else { 14469 newhelp->dthps_actions[i] = new; 14470 } 14471 14472 last = new; 14473 } 14474 } 14475 14476 /* 14477 * Duplicate the helper providers and register them with the 14478 * DTrace framework. 14479 */ 14480 if (help->dthps_nprovs > 0) { 14481 newhelp->dthps_nprovs = help->dthps_nprovs; 14482 newhelp->dthps_maxprovs = help->dthps_nprovs; 14483 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs * 14484 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 14485 for (i = 0; i < newhelp->dthps_nprovs; i++) { 14486 newhelp->dthps_provs[i] = help->dthps_provs[i]; 14487 newhelp->dthps_provs[i]->dthp_ref++; 14488 } 14489 14490 hasprovs = 1; 14491 } 14492 14493 mutex_exit(&dtrace_lock); 14494 14495 if (hasprovs) 14496 dtrace_helper_provider_register(to, newhelp, NULL); 14497 } 14498 14499 /* 14500 * DTrace Hook Functions 14501 */ 14502 static void 14503 dtrace_module_loaded(struct modctl *ctl) 14504 { 14505 dtrace_provider_t *prv; 14506 14507 mutex_enter(&dtrace_provider_lock); 14508 mutex_enter(&mod_lock); 14509 14510 ASSERT(ctl->mod_busy); 14511 14512 /* 14513 * We're going to call each providers per-module provide operation 14514 * specifying only this module. 14515 */ 14516 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) 14517 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 14518 14519 mutex_exit(&mod_lock); 14520 mutex_exit(&dtrace_provider_lock); 14521 14522 /* 14523 * If we have any retained enablings, we need to match against them. 14524 * Enabling probes requires that cpu_lock be held, and we cannot hold 14525 * cpu_lock here -- it is legal for cpu_lock to be held when loading a 14526 * module. (In particular, this happens when loading scheduling 14527 * classes.) So if we have any retained enablings, we need to dispatch 14528 * our task queue to do the match for us. 14529 */ 14530 mutex_enter(&dtrace_lock); 14531 14532 if (dtrace_retained == NULL) { 14533 mutex_exit(&dtrace_lock); 14534 return; 14535 } 14536 14537 (void) taskq_dispatch(dtrace_taskq, 14538 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); 14539 14540 mutex_exit(&dtrace_lock); 14541 14542 /* 14543 * And now, for a little heuristic sleaze: in general, we want to 14544 * match modules as soon as they load. However, we cannot guarantee 14545 * this, because it would lead us to the lock ordering violation 14546 * outlined above. The common case, of course, is that cpu_lock is 14547 * _not_ held -- so we delay here for a clock tick, hoping that that's 14548 * long enough for the task queue to do its work. If it's not, it's 14549 * not a serious problem -- it just means that the module that we 14550 * just loaded may not be immediately instrumentable. 14551 */ 14552 delay(1); 14553 } 14554 14555 static void 14556 dtrace_module_unloaded(struct modctl *ctl) 14557 { 14558 dtrace_probe_t template, *probe, *first, *next; 14559 dtrace_provider_t *prov; 14560 14561 template.dtpr_mod = ctl->mod_modname; 14562 14563 mutex_enter(&dtrace_provider_lock); 14564 mutex_enter(&mod_lock); 14565 mutex_enter(&dtrace_lock); 14566 14567 if (dtrace_bymod == NULL) { 14568 /* 14569 * The DTrace module is loaded (obviously) but not attached; 14570 * we don't have any work to do. 14571 */ 14572 mutex_exit(&dtrace_provider_lock); 14573 mutex_exit(&mod_lock); 14574 mutex_exit(&dtrace_lock); 14575 return; 14576 } 14577 14578 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); 14579 probe != NULL; probe = probe->dtpr_nextmod) { 14580 if (probe->dtpr_ecb != NULL) { 14581 mutex_exit(&dtrace_provider_lock); 14582 mutex_exit(&mod_lock); 14583 mutex_exit(&dtrace_lock); 14584 14585 /* 14586 * This shouldn't _actually_ be possible -- we're 14587 * unloading a module that has an enabled probe in it. 14588 * (It's normally up to the provider to make sure that 14589 * this can't happen.) However, because dtps_enable() 14590 * doesn't have a failure mode, there can be an 14591 * enable/unload race. Upshot: we don't want to 14592 * assert, but we're not going to disable the 14593 * probe, either. 14594 */ 14595 if (dtrace_err_verbose) { 14596 cmn_err(CE_WARN, "unloaded module '%s' had " 14597 "enabled probes", ctl->mod_modname); 14598 } 14599 14600 return; 14601 } 14602 } 14603 14604 probe = first; 14605 14606 for (first = NULL; probe != NULL; probe = next) { 14607 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); 14608 14609 dtrace_probes[probe->dtpr_id - 1] = NULL; 14610 14611 next = probe->dtpr_nextmod; 14612 dtrace_hash_remove(dtrace_bymod, probe); 14613 dtrace_hash_remove(dtrace_byfunc, probe); 14614 dtrace_hash_remove(dtrace_byname, probe); 14615 14616 if (first == NULL) { 14617 first = probe; 14618 probe->dtpr_nextmod = NULL; 14619 } else { 14620 probe->dtpr_nextmod = first; 14621 first = probe; 14622 } 14623 } 14624 14625 /* 14626 * We've removed all of the module's probes from the hash chains and 14627 * from the probe array. Now issue a dtrace_sync() to be sure that 14628 * everyone has cleared out from any probe array processing. 14629 */ 14630 dtrace_sync(); 14631 14632 for (probe = first; probe != NULL; probe = first) { 14633 first = probe->dtpr_nextmod; 14634 prov = probe->dtpr_provider; 14635 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, 14636 probe->dtpr_arg); 14637 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 14638 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 14639 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 14640 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); 14641 kmem_free(probe, sizeof (dtrace_probe_t)); 14642 } 14643 14644 mutex_exit(&dtrace_lock); 14645 mutex_exit(&mod_lock); 14646 mutex_exit(&dtrace_provider_lock); 14647 } 14648 14649 void 14650 dtrace_suspend(void) 14651 { 14652 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend)); 14653 } 14654 14655 void 14656 dtrace_resume(void) 14657 { 14658 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume)); 14659 } 14660 14661 static int 14662 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) 14663 { 14664 ASSERT(MUTEX_HELD(&cpu_lock)); 14665 mutex_enter(&dtrace_lock); 14666 14667 switch (what) { 14668 case CPU_CONFIG: { 14669 dtrace_state_t *state; 14670 dtrace_optval_t *opt, rs, c; 14671 14672 /* 14673 * For now, we only allocate a new buffer for anonymous state. 14674 */ 14675 if ((state = dtrace_anon.dta_state) == NULL) 14676 break; 14677 14678 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 14679 break; 14680 14681 opt = state->dts_options; 14682 c = opt[DTRACEOPT_CPU]; 14683 14684 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu) 14685 break; 14686 14687 /* 14688 * Regardless of what the actual policy is, we're going to 14689 * temporarily set our resize policy to be manual. We're 14690 * also going to temporarily set our CPU option to denote 14691 * the newly configured CPU. 14692 */ 14693 rs = opt[DTRACEOPT_BUFRESIZE]; 14694 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL; 14695 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu; 14696 14697 (void) dtrace_state_buffers(state); 14698 14699 opt[DTRACEOPT_BUFRESIZE] = rs; 14700 opt[DTRACEOPT_CPU] = c; 14701 14702 break; 14703 } 14704 14705 case CPU_UNCONFIG: 14706 /* 14707 * We don't free the buffer in the CPU_UNCONFIG case. (The 14708 * buffer will be freed when the consumer exits.) 14709 */ 14710 break; 14711 14712 default: 14713 break; 14714 } 14715 14716 mutex_exit(&dtrace_lock); 14717 return (0); 14718 } 14719 14720 static void 14721 dtrace_cpu_setup_initial(processorid_t cpu) 14722 { 14723 (void) dtrace_cpu_setup(CPU_CONFIG, cpu); 14724 } 14725 14726 static void 14727 dtrace_toxrange_add(uintptr_t base, uintptr_t limit) 14728 { 14729 if (dtrace_toxranges >= dtrace_toxranges_max) { 14730 int osize, nsize; 14731 dtrace_toxrange_t *range; 14732 14733 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 14734 14735 if (osize == 0) { 14736 ASSERT(dtrace_toxrange == NULL); 14737 ASSERT(dtrace_toxranges_max == 0); 14738 dtrace_toxranges_max = 1; 14739 } else { 14740 dtrace_toxranges_max <<= 1; 14741 } 14742 14743 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 14744 range = kmem_zalloc(nsize, KM_SLEEP); 14745 14746 if (dtrace_toxrange != NULL) { 14747 ASSERT(osize != 0); 14748 bcopy(dtrace_toxrange, range, osize); 14749 kmem_free(dtrace_toxrange, osize); 14750 } 14751 14752 dtrace_toxrange = range; 14753 } 14754 14755 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL); 14756 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL); 14757 14758 dtrace_toxrange[dtrace_toxranges].dtt_base = base; 14759 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; 14760 dtrace_toxranges++; 14761 } 14762 14763 /* 14764 * DTrace Driver Cookbook Functions 14765 */ 14766 /*ARGSUSED*/ 14767 static int 14768 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 14769 { 14770 dtrace_provider_id_t id; 14771 dtrace_state_t *state = NULL; 14772 dtrace_enabling_t *enab; 14773 14774 mutex_enter(&cpu_lock); 14775 mutex_enter(&dtrace_provider_lock); 14776 mutex_enter(&dtrace_lock); 14777 14778 if (ddi_soft_state_init(&dtrace_softstate, 14779 sizeof (dtrace_state_t), 0) != 0) { 14780 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); 14781 mutex_exit(&cpu_lock); 14782 mutex_exit(&dtrace_provider_lock); 14783 mutex_exit(&dtrace_lock); 14784 return (DDI_FAILURE); 14785 } 14786 14787 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, 14788 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || 14789 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, 14790 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { 14791 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); 14792 ddi_remove_minor_node(devi, NULL); 14793 ddi_soft_state_fini(&dtrace_softstate); 14794 mutex_exit(&cpu_lock); 14795 mutex_exit(&dtrace_provider_lock); 14796 mutex_exit(&dtrace_lock); 14797 return (DDI_FAILURE); 14798 } 14799 14800 ddi_report_dev(devi); 14801 dtrace_devi = devi; 14802 14803 dtrace_modload = dtrace_module_loaded; 14804 dtrace_modunload = dtrace_module_unloaded; 14805 dtrace_cpu_init = dtrace_cpu_setup_initial; 14806 dtrace_helpers_cleanup = dtrace_helpers_destroy; 14807 dtrace_helpers_fork = dtrace_helpers_duplicate; 14808 dtrace_cpustart_init = dtrace_suspend; 14809 dtrace_cpustart_fini = dtrace_resume; 14810 dtrace_debugger_init = dtrace_suspend; 14811 dtrace_debugger_fini = dtrace_resume; 14812 14813 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 14814 14815 ASSERT(MUTEX_HELD(&cpu_lock)); 14816 14817 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, 14818 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 14819 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, 14820 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, 14821 VM_SLEEP | VMC_IDENTIFIER); 14822 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 14823 1, INT_MAX, 0); 14824 14825 dtrace_state_cache = kmem_cache_create("dtrace_state_cache", 14826 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, 14827 NULL, NULL, NULL, NULL, NULL, 0); 14828 14829 ASSERT(MUTEX_HELD(&cpu_lock)); 14830 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), 14831 offsetof(dtrace_probe_t, dtpr_nextmod), 14832 offsetof(dtrace_probe_t, dtpr_prevmod)); 14833 14834 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), 14835 offsetof(dtrace_probe_t, dtpr_nextfunc), 14836 offsetof(dtrace_probe_t, dtpr_prevfunc)); 14837 14838 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), 14839 offsetof(dtrace_probe_t, dtpr_nextname), 14840 offsetof(dtrace_probe_t, dtpr_prevname)); 14841 14842 if (dtrace_retain_max < 1) { 14843 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " 14844 "setting to 1", dtrace_retain_max); 14845 dtrace_retain_max = 1; 14846 } 14847 14848 /* 14849 * Now discover our toxic ranges. 14850 */ 14851 dtrace_toxic_ranges(dtrace_toxrange_add); 14852 14853 /* 14854 * Before we register ourselves as a provider to our own framework, 14855 * we would like to assert that dtrace_provider is NULL -- but that's 14856 * not true if we were loaded as a dependency of a DTrace provider. 14857 * Once we've registered, we can assert that dtrace_provider is our 14858 * pseudo provider. 14859 */ 14860 (void) dtrace_register("dtrace", &dtrace_provider_attr, 14861 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); 14862 14863 ASSERT(dtrace_provider != NULL); 14864 ASSERT((dtrace_provider_id_t)dtrace_provider == id); 14865 14866 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 14867 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); 14868 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 14869 dtrace_provider, NULL, NULL, "END", 0, NULL); 14870 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 14871 dtrace_provider, NULL, NULL, "ERROR", 1, NULL); 14872 14873 dtrace_anon_property(); 14874 mutex_exit(&cpu_lock); 14875 14876 /* 14877 * If DTrace helper tracing is enabled, we need to allocate the 14878 * trace buffer and initialize the values. 14879 */ 14880 if (dtrace_helptrace_enabled) { 14881 ASSERT(dtrace_helptrace_buffer == NULL); 14882 dtrace_helptrace_buffer = 14883 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); 14884 dtrace_helptrace_next = 0; 14885 } 14886 14887 /* 14888 * If there are already providers, we must ask them to provide their 14889 * probes, and then match any anonymous enabling against them. Note 14890 * that there should be no other retained enablings at this time: 14891 * the only retained enablings at this time should be the anonymous 14892 * enabling. 14893 */ 14894 if (dtrace_anon.dta_enabling != NULL) { 14895 ASSERT(dtrace_retained == dtrace_anon.dta_enabling); 14896 14897 dtrace_enabling_provide(NULL); 14898 state = dtrace_anon.dta_state; 14899 14900 /* 14901 * We couldn't hold cpu_lock across the above call to 14902 * dtrace_enabling_provide(), but we must hold it to actually 14903 * enable the probes. We have to drop all of our locks, pick 14904 * up cpu_lock, and regain our locks before matching the 14905 * retained anonymous enabling. 14906 */ 14907 mutex_exit(&dtrace_lock); 14908 mutex_exit(&dtrace_provider_lock); 14909 14910 mutex_enter(&cpu_lock); 14911 mutex_enter(&dtrace_provider_lock); 14912 mutex_enter(&dtrace_lock); 14913 14914 if ((enab = dtrace_anon.dta_enabling) != NULL) 14915 (void) dtrace_enabling_match(enab, NULL); 14916 14917 mutex_exit(&cpu_lock); 14918 } 14919 14920 mutex_exit(&dtrace_lock); 14921 mutex_exit(&dtrace_provider_lock); 14922 14923 if (state != NULL) { 14924 /* 14925 * If we created any anonymous state, set it going now. 14926 */ 14927 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); 14928 } 14929 14930 return (DDI_SUCCESS); 14931 } 14932 14933 /*ARGSUSED*/ 14934 static int 14935 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 14936 { 14937 dtrace_state_t *state; 14938 uint32_t priv; 14939 uid_t uid; 14940 zoneid_t zoneid; 14941 14942 if (getminor(*devp) == DTRACEMNRN_HELPER) 14943 return (0); 14944 14945 /* 14946 * If this wasn't an open with the "helper" minor, then it must be 14947 * the "dtrace" minor. 14948 */ 14949 if (getminor(*devp) != DTRACEMNRN_DTRACE) 14950 return (ENXIO); 14951 14952 /* 14953 * If no DTRACE_PRIV_* bits are set in the credential, then the 14954 * caller lacks sufficient permission to do anything with DTrace. 14955 */ 14956 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid); 14957 if (priv == DTRACE_PRIV_NONE) 14958 return (EACCES); 14959 14960 /* 14961 * Ask all providers to provide all their probes. 14962 */ 14963 mutex_enter(&dtrace_provider_lock); 14964 dtrace_probe_provide(NULL, NULL); 14965 mutex_exit(&dtrace_provider_lock); 14966 14967 mutex_enter(&cpu_lock); 14968 mutex_enter(&dtrace_lock); 14969 dtrace_opens++; 14970 dtrace_membar_producer(); 14971 14972 /* 14973 * If the kernel debugger is active (that is, if the kernel debugger 14974 * modified text in some way), we won't allow the open. 14975 */ 14976 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 14977 dtrace_opens--; 14978 mutex_exit(&cpu_lock); 14979 mutex_exit(&dtrace_lock); 14980 return (EBUSY); 14981 } 14982 14983 state = dtrace_state_create(devp, cred_p); 14984 mutex_exit(&cpu_lock); 14985 14986 if (state == NULL) { 14987 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 14988 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 14989 mutex_exit(&dtrace_lock); 14990 return (EAGAIN); 14991 } 14992 14993 mutex_exit(&dtrace_lock); 14994 14995 return (0); 14996 } 14997 14998 /*ARGSUSED*/ 14999 static int 15000 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 15001 { 15002 minor_t minor = getminor(dev); 15003 dtrace_state_t *state; 15004 15005 if (minor == DTRACEMNRN_HELPER) 15006 return (0); 15007 15008 state = ddi_get_soft_state(dtrace_softstate, minor); 15009 15010 mutex_enter(&cpu_lock); 15011 mutex_enter(&dtrace_lock); 15012 15013 if (state->dts_anon) { 15014 /* 15015 * There is anonymous state. Destroy that first. 15016 */ 15017 ASSERT(dtrace_anon.dta_state == NULL); 15018 dtrace_state_destroy(state->dts_anon); 15019 } 15020 15021 dtrace_state_destroy(state); 15022 ASSERT(dtrace_opens > 0); 15023 15024 /* 15025 * Only relinquish control of the kernel debugger interface when there 15026 * are no consumers and no anonymous enablings. 15027 */ 15028 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 15029 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 15030 15031 mutex_exit(&dtrace_lock); 15032 mutex_exit(&cpu_lock); 15033 15034 return (0); 15035 } 15036 15037 /*ARGSUSED*/ 15038 static int 15039 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) 15040 { 15041 int rval; 15042 dof_helper_t help, *dhp = NULL; 15043 15044 switch (cmd) { 15045 case DTRACEHIOC_ADDDOF: 15046 if (copyin((void *)arg, &help, sizeof (help)) != 0) { 15047 dtrace_dof_error(NULL, "failed to copyin DOF helper"); 15048 return (EFAULT); 15049 } 15050 15051 dhp = &help; 15052 arg = (intptr_t)help.dofhp_dof; 15053 /*FALLTHROUGH*/ 15054 15055 case DTRACEHIOC_ADD: { 15056 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); 15057 15058 if (dof == NULL) 15059 return (rval); 15060 15061 mutex_enter(&dtrace_lock); 15062 15063 /* 15064 * dtrace_helper_slurp() takes responsibility for the dof -- 15065 * it may free it now or it may save it and free it later. 15066 */ 15067 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { 15068 *rv = rval; 15069 rval = 0; 15070 } else { 15071 rval = EINVAL; 15072 } 15073 15074 mutex_exit(&dtrace_lock); 15075 return (rval); 15076 } 15077 15078 case DTRACEHIOC_REMOVE: { 15079 mutex_enter(&dtrace_lock); 15080 rval = dtrace_helper_destroygen(arg); 15081 mutex_exit(&dtrace_lock); 15082 15083 return (rval); 15084 } 15085 15086 default: 15087 break; 15088 } 15089 15090 return (ENOTTY); 15091 } 15092 15093 /*ARGSUSED*/ 15094 static int 15095 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 15096 { 15097 minor_t minor = getminor(dev); 15098 dtrace_state_t *state; 15099 int rval; 15100 15101 if (minor == DTRACEMNRN_HELPER) 15102 return (dtrace_ioctl_helper(cmd, arg, rv)); 15103 15104 state = ddi_get_soft_state(dtrace_softstate, minor); 15105 15106 if (state->dts_anon) { 15107 ASSERT(dtrace_anon.dta_state == NULL); 15108 state = state->dts_anon; 15109 } 15110 15111 switch (cmd) { 15112 case DTRACEIOC_PROVIDER: { 15113 dtrace_providerdesc_t pvd; 15114 dtrace_provider_t *pvp; 15115 15116 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) 15117 return (EFAULT); 15118 15119 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; 15120 mutex_enter(&dtrace_provider_lock); 15121 15122 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { 15123 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) 15124 break; 15125 } 15126 15127 mutex_exit(&dtrace_provider_lock); 15128 15129 if (pvp == NULL) 15130 return (ESRCH); 15131 15132 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); 15133 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); 15134 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) 15135 return (EFAULT); 15136 15137 return (0); 15138 } 15139 15140 case DTRACEIOC_EPROBE: { 15141 dtrace_eprobedesc_t epdesc; 15142 dtrace_ecb_t *ecb; 15143 dtrace_action_t *act; 15144 void *buf; 15145 size_t size; 15146 uintptr_t dest; 15147 int nrecs; 15148 15149 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) 15150 return (EFAULT); 15151 15152 mutex_enter(&dtrace_lock); 15153 15154 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { 15155 mutex_exit(&dtrace_lock); 15156 return (EINVAL); 15157 } 15158 15159 if (ecb->dte_probe == NULL) { 15160 mutex_exit(&dtrace_lock); 15161 return (EINVAL); 15162 } 15163 15164 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; 15165 epdesc.dtepd_uarg = ecb->dte_uarg; 15166 epdesc.dtepd_size = ecb->dte_size; 15167 15168 nrecs = epdesc.dtepd_nrecs; 15169 epdesc.dtepd_nrecs = 0; 15170 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 15171 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 15172 continue; 15173 15174 epdesc.dtepd_nrecs++; 15175 } 15176 15177 /* 15178 * Now that we have the size, we need to allocate a temporary 15179 * buffer in which to store the complete description. We need 15180 * the temporary buffer to be able to drop dtrace_lock() 15181 * across the copyout(), below. 15182 */ 15183 size = sizeof (dtrace_eprobedesc_t) + 15184 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); 15185 15186 buf = kmem_alloc(size, KM_SLEEP); 15187 dest = (uintptr_t)buf; 15188 15189 bcopy(&epdesc, (void *)dest, sizeof (epdesc)); 15190 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); 15191 15192 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 15193 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 15194 continue; 15195 15196 if (nrecs-- == 0) 15197 break; 15198 15199 bcopy(&act->dta_rec, (void *)dest, 15200 sizeof (dtrace_recdesc_t)); 15201 dest += sizeof (dtrace_recdesc_t); 15202 } 15203 15204 mutex_exit(&dtrace_lock); 15205 15206 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 15207 kmem_free(buf, size); 15208 return (EFAULT); 15209 } 15210 15211 kmem_free(buf, size); 15212 return (0); 15213 } 15214 15215 case DTRACEIOC_AGGDESC: { 15216 dtrace_aggdesc_t aggdesc; 15217 dtrace_action_t *act; 15218 dtrace_aggregation_t *agg; 15219 int nrecs; 15220 uint32_t offs; 15221 dtrace_recdesc_t *lrec; 15222 void *buf; 15223 size_t size; 15224 uintptr_t dest; 15225 15226 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) 15227 return (EFAULT); 15228 15229 mutex_enter(&dtrace_lock); 15230 15231 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { 15232 mutex_exit(&dtrace_lock); 15233 return (EINVAL); 15234 } 15235 15236 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; 15237 15238 nrecs = aggdesc.dtagd_nrecs; 15239 aggdesc.dtagd_nrecs = 0; 15240 15241 offs = agg->dtag_base; 15242 lrec = &agg->dtag_action.dta_rec; 15243 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; 15244 15245 for (act = agg->dtag_first; ; act = act->dta_next) { 15246 ASSERT(act->dta_intuple || 15247 DTRACEACT_ISAGG(act->dta_kind)); 15248 15249 /* 15250 * If this action has a record size of zero, it 15251 * denotes an argument to the aggregating action. 15252 * Because the presence of this record doesn't (or 15253 * shouldn't) affect the way the data is interpreted, 15254 * we don't copy it out to save user-level the 15255 * confusion of dealing with a zero-length record. 15256 */ 15257 if (act->dta_rec.dtrd_size == 0) { 15258 ASSERT(agg->dtag_hasarg); 15259 continue; 15260 } 15261 15262 aggdesc.dtagd_nrecs++; 15263 15264 if (act == &agg->dtag_action) 15265 break; 15266 } 15267 15268 /* 15269 * Now that we have the size, we need to allocate a temporary 15270 * buffer in which to store the complete description. We need 15271 * the temporary buffer to be able to drop dtrace_lock() 15272 * across the copyout(), below. 15273 */ 15274 size = sizeof (dtrace_aggdesc_t) + 15275 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); 15276 15277 buf = kmem_alloc(size, KM_SLEEP); 15278 dest = (uintptr_t)buf; 15279 15280 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); 15281 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); 15282 15283 for (act = agg->dtag_first; ; act = act->dta_next) { 15284 dtrace_recdesc_t rec = act->dta_rec; 15285 15286 /* 15287 * See the comment in the above loop for why we pass 15288 * over zero-length records. 15289 */ 15290 if (rec.dtrd_size == 0) { 15291 ASSERT(agg->dtag_hasarg); 15292 continue; 15293 } 15294 15295 if (nrecs-- == 0) 15296 break; 15297 15298 rec.dtrd_offset -= offs; 15299 bcopy(&rec, (void *)dest, sizeof (rec)); 15300 dest += sizeof (dtrace_recdesc_t); 15301 15302 if (act == &agg->dtag_action) 15303 break; 15304 } 15305 15306 mutex_exit(&dtrace_lock); 15307 15308 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 15309 kmem_free(buf, size); 15310 return (EFAULT); 15311 } 15312 15313 kmem_free(buf, size); 15314 return (0); 15315 } 15316 15317 case DTRACEIOC_ENABLE: { 15318 dof_hdr_t *dof; 15319 dtrace_enabling_t *enab = NULL; 15320 dtrace_vstate_t *vstate; 15321 int err = 0; 15322 15323 *rv = 0; 15324 15325 /* 15326 * If a NULL argument has been passed, we take this as our 15327 * cue to reevaluate our enablings. 15328 */ 15329 if (arg == NULL) { 15330 dtrace_enabling_matchall(); 15331 15332 return (0); 15333 } 15334 15335 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) 15336 return (rval); 15337 15338 mutex_enter(&cpu_lock); 15339 mutex_enter(&dtrace_lock); 15340 vstate = &state->dts_vstate; 15341 15342 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 15343 mutex_exit(&dtrace_lock); 15344 mutex_exit(&cpu_lock); 15345 dtrace_dof_destroy(dof); 15346 return (EBUSY); 15347 } 15348 15349 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { 15350 mutex_exit(&dtrace_lock); 15351 mutex_exit(&cpu_lock); 15352 dtrace_dof_destroy(dof); 15353 return (EINVAL); 15354 } 15355 15356 if ((rval = dtrace_dof_options(dof, state)) != 0) { 15357 dtrace_enabling_destroy(enab); 15358 mutex_exit(&dtrace_lock); 15359 mutex_exit(&cpu_lock); 15360 dtrace_dof_destroy(dof); 15361 return (rval); 15362 } 15363 15364 if ((err = dtrace_enabling_match(enab, rv)) == 0) { 15365 err = dtrace_enabling_retain(enab); 15366 } else { 15367 dtrace_enabling_destroy(enab); 15368 } 15369 15370 mutex_exit(&cpu_lock); 15371 mutex_exit(&dtrace_lock); 15372 dtrace_dof_destroy(dof); 15373 15374 return (err); 15375 } 15376 15377 case DTRACEIOC_REPLICATE: { 15378 dtrace_repldesc_t desc; 15379 dtrace_probedesc_t *match = &desc.dtrpd_match; 15380 dtrace_probedesc_t *create = &desc.dtrpd_create; 15381 int err; 15382 15383 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15384 return (EFAULT); 15385 15386 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15387 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15388 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15389 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15390 15391 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15392 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15393 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15394 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15395 15396 mutex_enter(&dtrace_lock); 15397 err = dtrace_enabling_replicate(state, match, create); 15398 mutex_exit(&dtrace_lock); 15399 15400 return (err); 15401 } 15402 15403 case DTRACEIOC_PROBEMATCH: 15404 case DTRACEIOC_PROBES: { 15405 dtrace_probe_t *probe = NULL; 15406 dtrace_probedesc_t desc; 15407 dtrace_probekey_t pkey; 15408 dtrace_id_t i; 15409 int m = 0; 15410 uint32_t priv; 15411 uid_t uid; 15412 zoneid_t zoneid; 15413 15414 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15415 return (EFAULT); 15416 15417 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15418 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15419 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15420 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15421 15422 /* 15423 * Before we attempt to match this probe, we want to give 15424 * all providers the opportunity to provide it. 15425 */ 15426 if (desc.dtpd_id == DTRACE_IDNONE) { 15427 mutex_enter(&dtrace_provider_lock); 15428 dtrace_probe_provide(&desc, NULL); 15429 mutex_exit(&dtrace_provider_lock); 15430 desc.dtpd_id++; 15431 } 15432 15433 if (cmd == DTRACEIOC_PROBEMATCH) { 15434 dtrace_probekey(&desc, &pkey); 15435 pkey.dtpk_id = DTRACE_IDNONE; 15436 } 15437 15438 dtrace_cred2priv(cr, &priv, &uid, &zoneid); 15439 15440 mutex_enter(&dtrace_lock); 15441 15442 if (cmd == DTRACEIOC_PROBEMATCH) { 15443 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 15444 if ((probe = dtrace_probes[i - 1]) != NULL && 15445 (m = dtrace_match_probe(probe, &pkey, 15446 priv, uid, zoneid)) != 0) 15447 break; 15448 } 15449 15450 if (m < 0) { 15451 mutex_exit(&dtrace_lock); 15452 return (EINVAL); 15453 } 15454 15455 } else { 15456 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 15457 if ((probe = dtrace_probes[i - 1]) != NULL && 15458 dtrace_match_priv(probe, priv, uid, zoneid)) 15459 break; 15460 } 15461 } 15462 15463 if (probe == NULL) { 15464 mutex_exit(&dtrace_lock); 15465 return (ESRCH); 15466 } 15467 15468 dtrace_probe_description(probe, &desc); 15469 mutex_exit(&dtrace_lock); 15470 15471 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15472 return (EFAULT); 15473 15474 return (0); 15475 } 15476 15477 case DTRACEIOC_PROBEARG: { 15478 dtrace_argdesc_t desc; 15479 dtrace_probe_t *probe; 15480 dtrace_provider_t *prov; 15481 15482 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15483 return (EFAULT); 15484 15485 if (desc.dtargd_id == DTRACE_IDNONE) 15486 return (EINVAL); 15487 15488 if (desc.dtargd_ndx == DTRACE_ARGNONE) 15489 return (EINVAL); 15490 15491 mutex_enter(&dtrace_provider_lock); 15492 mutex_enter(&mod_lock); 15493 mutex_enter(&dtrace_lock); 15494 15495 if (desc.dtargd_id > dtrace_nprobes) { 15496 mutex_exit(&dtrace_lock); 15497 mutex_exit(&mod_lock); 15498 mutex_exit(&dtrace_provider_lock); 15499 return (EINVAL); 15500 } 15501 15502 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { 15503 mutex_exit(&dtrace_lock); 15504 mutex_exit(&mod_lock); 15505 mutex_exit(&dtrace_provider_lock); 15506 return (EINVAL); 15507 } 15508 15509 mutex_exit(&dtrace_lock); 15510 15511 prov = probe->dtpr_provider; 15512 15513 if (prov->dtpv_pops.dtps_getargdesc == NULL) { 15514 /* 15515 * There isn't any typed information for this probe. 15516 * Set the argument number to DTRACE_ARGNONE. 15517 */ 15518 desc.dtargd_ndx = DTRACE_ARGNONE; 15519 } else { 15520 desc.dtargd_native[0] = '\0'; 15521 desc.dtargd_xlate[0] = '\0'; 15522 desc.dtargd_mapping = desc.dtargd_ndx; 15523 15524 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, 15525 probe->dtpr_id, probe->dtpr_arg, &desc); 15526 } 15527 15528 mutex_exit(&mod_lock); 15529 mutex_exit(&dtrace_provider_lock); 15530 15531 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15532 return (EFAULT); 15533 15534 return (0); 15535 } 15536 15537 case DTRACEIOC_GO: { 15538 processorid_t cpuid; 15539 rval = dtrace_state_go(state, &cpuid); 15540 15541 if (rval != 0) 15542 return (rval); 15543 15544 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 15545 return (EFAULT); 15546 15547 return (0); 15548 } 15549 15550 case DTRACEIOC_STOP: { 15551 processorid_t cpuid; 15552 15553 mutex_enter(&dtrace_lock); 15554 rval = dtrace_state_stop(state, &cpuid); 15555 mutex_exit(&dtrace_lock); 15556 15557 if (rval != 0) 15558 return (rval); 15559 15560 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 15561 return (EFAULT); 15562 15563 return (0); 15564 } 15565 15566 case DTRACEIOC_DOFGET: { 15567 dof_hdr_t hdr, *dof; 15568 uint64_t len; 15569 15570 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) 15571 return (EFAULT); 15572 15573 mutex_enter(&dtrace_lock); 15574 dof = dtrace_dof_create(state); 15575 mutex_exit(&dtrace_lock); 15576 15577 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); 15578 rval = copyout(dof, (void *)arg, len); 15579 dtrace_dof_destroy(dof); 15580 15581 return (rval == 0 ? 0 : EFAULT); 15582 } 15583 15584 case DTRACEIOC_AGGSNAP: 15585 case DTRACEIOC_BUFSNAP: { 15586 dtrace_bufdesc_t desc; 15587 caddr_t cached; 15588 dtrace_buffer_t *buf; 15589 15590 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15591 return (EFAULT); 15592 15593 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) 15594 return (EINVAL); 15595 15596 mutex_enter(&dtrace_lock); 15597 15598 if (cmd == DTRACEIOC_BUFSNAP) { 15599 buf = &state->dts_buffer[desc.dtbd_cpu]; 15600 } else { 15601 buf = &state->dts_aggbuffer[desc.dtbd_cpu]; 15602 } 15603 15604 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { 15605 size_t sz = buf->dtb_offset; 15606 15607 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { 15608 mutex_exit(&dtrace_lock); 15609 return (EBUSY); 15610 } 15611 15612 /* 15613 * If this buffer has already been consumed, we're 15614 * going to indicate that there's nothing left here 15615 * to consume. 15616 */ 15617 if (buf->dtb_flags & DTRACEBUF_CONSUMED) { 15618 mutex_exit(&dtrace_lock); 15619 15620 desc.dtbd_size = 0; 15621 desc.dtbd_drops = 0; 15622 desc.dtbd_errors = 0; 15623 desc.dtbd_oldest = 0; 15624 sz = sizeof (desc); 15625 15626 if (copyout(&desc, (void *)arg, sz) != 0) 15627 return (EFAULT); 15628 15629 return (0); 15630 } 15631 15632 /* 15633 * If this is a ring buffer that has wrapped, we want 15634 * to copy the whole thing out. 15635 */ 15636 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 15637 dtrace_buffer_polish(buf); 15638 sz = buf->dtb_size; 15639 } 15640 15641 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { 15642 mutex_exit(&dtrace_lock); 15643 return (EFAULT); 15644 } 15645 15646 desc.dtbd_size = sz; 15647 desc.dtbd_drops = buf->dtb_drops; 15648 desc.dtbd_errors = buf->dtb_errors; 15649 desc.dtbd_oldest = buf->dtb_xamot_offset; 15650 15651 mutex_exit(&dtrace_lock); 15652 15653 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15654 return (EFAULT); 15655 15656 buf->dtb_flags |= DTRACEBUF_CONSUMED; 15657 15658 return (0); 15659 } 15660 15661 if (buf->dtb_tomax == NULL) { 15662 ASSERT(buf->dtb_xamot == NULL); 15663 mutex_exit(&dtrace_lock); 15664 return (ENOENT); 15665 } 15666 15667 cached = buf->dtb_tomax; 15668 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 15669 15670 dtrace_xcall(desc.dtbd_cpu, 15671 (dtrace_xcall_t)dtrace_buffer_switch, buf); 15672 15673 state->dts_errors += buf->dtb_xamot_errors; 15674 15675 /* 15676 * If the buffers did not actually switch, then the cross call 15677 * did not take place -- presumably because the given CPU is 15678 * not in the ready set. If this is the case, we'll return 15679 * ENOENT. 15680 */ 15681 if (buf->dtb_tomax == cached) { 15682 ASSERT(buf->dtb_xamot != cached); 15683 mutex_exit(&dtrace_lock); 15684 return (ENOENT); 15685 } 15686 15687 ASSERT(cached == buf->dtb_xamot); 15688 15689 /* 15690 * We have our snapshot; now copy it out. 15691 */ 15692 if (copyout(buf->dtb_xamot, desc.dtbd_data, 15693 buf->dtb_xamot_offset) != 0) { 15694 mutex_exit(&dtrace_lock); 15695 return (EFAULT); 15696 } 15697 15698 desc.dtbd_size = buf->dtb_xamot_offset; 15699 desc.dtbd_drops = buf->dtb_xamot_drops; 15700 desc.dtbd_errors = buf->dtb_xamot_errors; 15701 desc.dtbd_oldest = 0; 15702 15703 mutex_exit(&dtrace_lock); 15704 15705 /* 15706 * Finally, copy out the buffer description. 15707 */ 15708 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15709 return (EFAULT); 15710 15711 return (0); 15712 } 15713 15714 case DTRACEIOC_CONF: { 15715 dtrace_conf_t conf; 15716 15717 bzero(&conf, sizeof (conf)); 15718 conf.dtc_difversion = DIF_VERSION; 15719 conf.dtc_difintregs = DIF_DIR_NREGS; 15720 conf.dtc_diftupregs = DIF_DTR_NREGS; 15721 conf.dtc_ctfmodel = CTF_MODEL_NATIVE; 15722 15723 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) 15724 return (EFAULT); 15725 15726 return (0); 15727 } 15728 15729 case DTRACEIOC_STATUS: { 15730 dtrace_status_t stat; 15731 dtrace_dstate_t *dstate; 15732 int i, j; 15733 uint64_t nerrs; 15734 15735 /* 15736 * See the comment in dtrace_state_deadman() for the reason 15737 * for setting dts_laststatus to INT64_MAX before setting 15738 * it to the correct value. 15739 */ 15740 state->dts_laststatus = INT64_MAX; 15741 dtrace_membar_producer(); 15742 state->dts_laststatus = dtrace_gethrtime(); 15743 15744 bzero(&stat, sizeof (stat)); 15745 15746 mutex_enter(&dtrace_lock); 15747 15748 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { 15749 mutex_exit(&dtrace_lock); 15750 return (ENOENT); 15751 } 15752 15753 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) 15754 stat.dtst_exiting = 1; 15755 15756 nerrs = state->dts_errors; 15757 dstate = &state->dts_vstate.dtvs_dynvars; 15758 15759 for (i = 0; i < NCPU; i++) { 15760 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; 15761 15762 stat.dtst_dyndrops += dcpu->dtdsc_drops; 15763 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; 15764 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; 15765 15766 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) 15767 stat.dtst_filled++; 15768 15769 nerrs += state->dts_buffer[i].dtb_errors; 15770 15771 for (j = 0; j < state->dts_nspeculations; j++) { 15772 dtrace_speculation_t *spec; 15773 dtrace_buffer_t *buf; 15774 15775 spec = &state->dts_speculations[j]; 15776 buf = &spec->dtsp_buffer[i]; 15777 stat.dtst_specdrops += buf->dtb_xamot_drops; 15778 } 15779 } 15780 15781 stat.dtst_specdrops_busy = state->dts_speculations_busy; 15782 stat.dtst_specdrops_unavail = state->dts_speculations_unavail; 15783 stat.dtst_stkstroverflows = state->dts_stkstroverflows; 15784 stat.dtst_dblerrors = state->dts_dblerrors; 15785 stat.dtst_killed = 15786 (state->dts_activity == DTRACE_ACTIVITY_KILLED); 15787 stat.dtst_errors = nerrs; 15788 15789 mutex_exit(&dtrace_lock); 15790 15791 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) 15792 return (EFAULT); 15793 15794 return (0); 15795 } 15796 15797 case DTRACEIOC_FORMAT: { 15798 dtrace_fmtdesc_t fmt; 15799 char *str; 15800 int len; 15801 15802 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) 15803 return (EFAULT); 15804 15805 mutex_enter(&dtrace_lock); 15806 15807 if (fmt.dtfd_format == 0 || 15808 fmt.dtfd_format > state->dts_nformats) { 15809 mutex_exit(&dtrace_lock); 15810 return (EINVAL); 15811 } 15812 15813 /* 15814 * Format strings are allocated contiguously and they are 15815 * never freed; if a format index is less than the number 15816 * of formats, we can assert that the format map is non-NULL 15817 * and that the format for the specified index is non-NULL. 15818 */ 15819 ASSERT(state->dts_formats != NULL); 15820 str = state->dts_formats[fmt.dtfd_format - 1]; 15821 ASSERT(str != NULL); 15822 15823 len = strlen(str) + 1; 15824 15825 if (len > fmt.dtfd_length) { 15826 fmt.dtfd_length = len; 15827 15828 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { 15829 mutex_exit(&dtrace_lock); 15830 return (EINVAL); 15831 } 15832 } else { 15833 if (copyout(str, fmt.dtfd_string, len) != 0) { 15834 mutex_exit(&dtrace_lock); 15835 return (EINVAL); 15836 } 15837 } 15838 15839 mutex_exit(&dtrace_lock); 15840 return (0); 15841 } 15842 15843 default: 15844 break; 15845 } 15846 15847 return (ENOTTY); 15848 } 15849 15850 /*ARGSUSED*/ 15851 static int 15852 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 15853 { 15854 dtrace_state_t *state; 15855 15856 switch (cmd) { 15857 case DDI_DETACH: 15858 break; 15859 15860 case DDI_SUSPEND: 15861 return (DDI_SUCCESS); 15862 15863 default: 15864 return (DDI_FAILURE); 15865 } 15866 15867 mutex_enter(&cpu_lock); 15868 mutex_enter(&dtrace_provider_lock); 15869 mutex_enter(&dtrace_lock); 15870 15871 ASSERT(dtrace_opens == 0); 15872 15873 if (dtrace_helpers > 0) { 15874 mutex_exit(&dtrace_provider_lock); 15875 mutex_exit(&dtrace_lock); 15876 mutex_exit(&cpu_lock); 15877 return (DDI_FAILURE); 15878 } 15879 15880 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { 15881 mutex_exit(&dtrace_provider_lock); 15882 mutex_exit(&dtrace_lock); 15883 mutex_exit(&cpu_lock); 15884 return (DDI_FAILURE); 15885 } 15886 15887 dtrace_provider = NULL; 15888 15889 if ((state = dtrace_anon_grab()) != NULL) { 15890 /* 15891 * If there were ECBs on this state, the provider should 15892 * have not been allowed to detach; assert that there is 15893 * none. 15894 */ 15895 ASSERT(state->dts_necbs == 0); 15896 dtrace_state_destroy(state); 15897 15898 /* 15899 * If we're being detached with anonymous state, we need to 15900 * indicate to the kernel debugger that DTrace is now inactive. 15901 */ 15902 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 15903 } 15904 15905 bzero(&dtrace_anon, sizeof (dtrace_anon_t)); 15906 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 15907 dtrace_cpu_init = NULL; 15908 dtrace_helpers_cleanup = NULL; 15909 dtrace_helpers_fork = NULL; 15910 dtrace_cpustart_init = NULL; 15911 dtrace_cpustart_fini = NULL; 15912 dtrace_debugger_init = NULL; 15913 dtrace_debugger_fini = NULL; 15914 dtrace_modload = NULL; 15915 dtrace_modunload = NULL; 15916 15917 mutex_exit(&cpu_lock); 15918 15919 if (dtrace_helptrace_enabled) { 15920 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize); 15921 dtrace_helptrace_buffer = NULL; 15922 } 15923 15924 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); 15925 dtrace_probes = NULL; 15926 dtrace_nprobes = 0; 15927 15928 dtrace_hash_destroy(dtrace_bymod); 15929 dtrace_hash_destroy(dtrace_byfunc); 15930 dtrace_hash_destroy(dtrace_byname); 15931 dtrace_bymod = NULL; 15932 dtrace_byfunc = NULL; 15933 dtrace_byname = NULL; 15934 15935 kmem_cache_destroy(dtrace_state_cache); 15936 vmem_destroy(dtrace_minor); 15937 vmem_destroy(dtrace_arena); 15938 15939 if (dtrace_toxrange != NULL) { 15940 kmem_free(dtrace_toxrange, 15941 dtrace_toxranges_max * sizeof (dtrace_toxrange_t)); 15942 dtrace_toxrange = NULL; 15943 dtrace_toxranges = 0; 15944 dtrace_toxranges_max = 0; 15945 } 15946 15947 ddi_remove_minor_node(dtrace_devi, NULL); 15948 dtrace_devi = NULL; 15949 15950 ddi_soft_state_fini(&dtrace_softstate); 15951 15952 ASSERT(dtrace_vtime_references == 0); 15953 ASSERT(dtrace_opens == 0); 15954 ASSERT(dtrace_retained == NULL); 15955 15956 mutex_exit(&dtrace_lock); 15957 mutex_exit(&dtrace_provider_lock); 15958 15959 /* 15960 * We don't destroy the task queue until after we have dropped our 15961 * locks (taskq_destroy() may block on running tasks). To prevent 15962 * attempting to do work after we have effectively detached but before 15963 * the task queue has been destroyed, all tasks dispatched via the 15964 * task queue must check that DTrace is still attached before 15965 * performing any operation. 15966 */ 15967 taskq_destroy(dtrace_taskq); 15968 dtrace_taskq = NULL; 15969 15970 return (DDI_SUCCESS); 15971 } 15972 15973 /*ARGSUSED*/ 15974 static int 15975 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 15976 { 15977 int error; 15978 15979 switch (infocmd) { 15980 case DDI_INFO_DEVT2DEVINFO: 15981 *result = (void *)dtrace_devi; 15982 error = DDI_SUCCESS; 15983 break; 15984 case DDI_INFO_DEVT2INSTANCE: 15985 *result = (void *)0; 15986 error = DDI_SUCCESS; 15987 break; 15988 default: 15989 error = DDI_FAILURE; 15990 } 15991 return (error); 15992 } 15993 15994 static struct cb_ops dtrace_cb_ops = { 15995 dtrace_open, /* open */ 15996 dtrace_close, /* close */ 15997 nulldev, /* strategy */ 15998 nulldev, /* print */ 15999 nodev, /* dump */ 16000 nodev, /* read */ 16001 nodev, /* write */ 16002 dtrace_ioctl, /* ioctl */ 16003 nodev, /* devmap */ 16004 nodev, /* mmap */ 16005 nodev, /* segmap */ 16006 nochpoll, /* poll */ 16007 ddi_prop_op, /* cb_prop_op */ 16008 0, /* streamtab */ 16009 D_NEW | D_MP /* Driver compatibility flag */ 16010 }; 16011 16012 static struct dev_ops dtrace_ops = { 16013 DEVO_REV, /* devo_rev */ 16014 0, /* refcnt */ 16015 dtrace_info, /* get_dev_info */ 16016 nulldev, /* identify */ 16017 nulldev, /* probe */ 16018 dtrace_attach, /* attach */ 16019 dtrace_detach, /* detach */ 16020 nodev, /* reset */ 16021 &dtrace_cb_ops, /* driver operations */ 16022 NULL, /* bus operations */ 16023 nodev, /* dev power */ 16024 ddi_quiesce_not_needed, /* quiesce */ 16025 }; 16026 16027 static struct modldrv modldrv = { 16028 &mod_driverops, /* module type (this is a pseudo driver) */ 16029 "Dynamic Tracing", /* name of module */ 16030 &dtrace_ops, /* driver ops */ 16031 }; 16032 16033 static struct modlinkage modlinkage = { 16034 MODREV_1, 16035 (void *)&modldrv, 16036 NULL 16037 }; 16038 16039 int 16040 _init(void) 16041 { 16042 return (mod_install(&modlinkage)); 16043 } 16044 16045 int 16046 _info(struct modinfo *modinfop) 16047 { 16048 return (mod_info(&modlinkage, modinfop)); 16049 } 16050 16051 int 16052 _fini(void) 16053 { 16054 return (mod_remove(&modlinkage)); 16055 }