Print this page
2917 DTrace in a zone should have limited provider access
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/dtrace/dtrace.c
+++ new/usr/src/uts/common/dtrace/dtrace.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /*
28 28 * DTrace - Dynamic Tracing for Solaris
29 29 *
30 30 * This is the implementation of the Solaris Dynamic Tracing framework
31 31 * (DTrace). The user-visible interface to DTrace is described at length in
32 32 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
33 33 * library, the in-kernel DTrace framework, and the DTrace providers are
34 34 * described in the block comments in the <sys/dtrace.h> header file. The
35 35 * internal architecture of DTrace is described in the block comments in the
36 36 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
37 37 * implementation very much assume mastery of all of these sources; if one has
38 38 * an unanswered question about the implementation, one should consult them
39 39 * first.
40 40 *
41 41 * The functions here are ordered roughly as follows:
42 42 *
43 43 * - Probe context functions
44 44 * - Probe hashing functions
45 45 * - Non-probe context utility functions
46 46 * - Matching functions
47 47 * - Provider-to-Framework API functions
48 48 * - Probe management functions
49 49 * - DIF object functions
50 50 * - Format functions
51 51 * - Predicate functions
52 52 * - ECB functions
53 53 * - Buffer functions
54 54 * - Enabling functions
55 55 * - DOF functions
56 56 * - Anonymous enabling functions
57 57 * - Consumer state functions
58 58 * - Helper functions
59 59 * - Hook functions
60 60 * - Driver cookbook functions
61 61 *
62 62 * Each group of functions begins with a block comment labelled the "DTrace
63 63 * [Group] Functions", allowing one to find each block by searching forward
64 64 * on capital-f functions.
65 65 */
66 66 #include <sys/errno.h>
67 67 #include <sys/stat.h>
68 68 #include <sys/modctl.h>
69 69 #include <sys/conf.h>
70 70 #include <sys/systm.h>
71 71 #include <sys/ddi.h>
72 72 #include <sys/sunddi.h>
73 73 #include <sys/cpuvar.h>
74 74 #include <sys/kmem.h>
75 75 #include <sys/strsubr.h>
76 76 #include <sys/sysmacros.h>
77 77 #include <sys/dtrace_impl.h>
78 78 #include <sys/atomic.h>
79 79 #include <sys/cmn_err.h>
80 80 #include <sys/mutex_impl.h>
81 81 #include <sys/rwlock_impl.h>
82 82 #include <sys/ctf_api.h>
83 83 #include <sys/panic.h>
84 84 #include <sys/priv_impl.h>
85 85 #include <sys/policy.h>
86 86 #include <sys/cred_impl.h>
87 87 #include <sys/procfs_isa.h>
88 88 #include <sys/taskq.h>
89 89 #include <sys/mkdev.h>
90 90 #include <sys/kdi.h>
91 91 #include <sys/zone.h>
92 92 #include <sys/socket.h>
93 93 #include <netinet/in.h>
94 94
95 95 /*
96 96 * DTrace Tunable Variables
97 97 *
98 98 * The following variables may be tuned by adding a line to /etc/system that
99 99 * includes both the name of the DTrace module ("dtrace") and the name of the
100 100 * variable. For example:
101 101 *
102 102 * set dtrace:dtrace_destructive_disallow = 1
103 103 *
104 104 * In general, the only variables that one should be tuning this way are those
105 105 * that affect system-wide DTrace behavior, and for which the default behavior
106 106 * is undesirable. Most of these variables are tunable on a per-consumer
107 107 * basis using DTrace options, and need not be tuned on a system-wide basis.
108 108 * When tuning these variables, avoid pathological values; while some attempt
109 109 * is made to verify the integrity of these variables, they are not considered
110 110 * part of the supported interface to DTrace, and they are therefore not
111 111 * checked comprehensively. Further, these variables should not be tuned
112 112 * dynamically via "mdb -kw" or other means; they should only be tuned via
113 113 * /etc/system.
114 114 */
115 115 int dtrace_destructive_disallow = 0;
116 116 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
117 117 size_t dtrace_difo_maxsize = (256 * 1024);
118 118 dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
119 119 size_t dtrace_global_maxsize = (16 * 1024);
120 120 size_t dtrace_actions_max = (16 * 1024);
121 121 size_t dtrace_retain_max = 1024;
122 122 dtrace_optval_t dtrace_helper_actions_max = 1024;
123 123 dtrace_optval_t dtrace_helper_providers_max = 32;
124 124 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
125 125 size_t dtrace_strsize_default = 256;
126 126 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
127 127 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
128 128 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
129 129 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
130 130 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
131 131 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
132 132 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
133 133 dtrace_optval_t dtrace_nspec_default = 1;
134 134 dtrace_optval_t dtrace_specsize_default = 32 * 1024;
135 135 dtrace_optval_t dtrace_stackframes_default = 20;
136 136 dtrace_optval_t dtrace_ustackframes_default = 20;
137 137 dtrace_optval_t dtrace_jstackframes_default = 50;
138 138 dtrace_optval_t dtrace_jstackstrsize_default = 512;
139 139 int dtrace_msgdsize_max = 128;
140 140 hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
141 141 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
142 142 int dtrace_devdepth_max = 32;
143 143 int dtrace_err_verbose;
144 144 hrtime_t dtrace_deadman_interval = NANOSEC;
145 145 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
146 146 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
147 147 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
148 148
149 149 /*
150 150 * DTrace External Variables
151 151 *
152 152 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
153 153 * available to DTrace consumers via the backtick (`) syntax. One of these,
154 154 * dtrace_zero, is made deliberately so: it is provided as a source of
155 155 * well-known, zero-filled memory. While this variable is not documented,
156 156 * it is used by some translators as an implementation detail.
157 157 */
158 158 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
159 159
160 160 /*
161 161 * DTrace Internal Variables
162 162 */
163 163 static dev_info_t *dtrace_devi; /* device info */
164 164 static vmem_t *dtrace_arena; /* probe ID arena */
165 165 static vmem_t *dtrace_minor; /* minor number arena */
166 166 static taskq_t *dtrace_taskq; /* task queue */
167 167 static dtrace_probe_t **dtrace_probes; /* array of all probes */
168 168 static int dtrace_nprobes; /* number of probes */
169 169 static dtrace_provider_t *dtrace_provider; /* provider list */
170 170 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
171 171 static int dtrace_opens; /* number of opens */
172 172 static int dtrace_helpers; /* number of helpers */
173 173 static int dtrace_getf; /* number of unpriv getf()s */
174 174 static void *dtrace_softstate; /* softstate pointer */
175 175 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
176 176 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
177 177 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
178 178 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
179 179 static int dtrace_toxranges; /* number of toxic ranges */
180 180 static int dtrace_toxranges_max; /* size of toxic range array */
181 181 static dtrace_anon_t dtrace_anon; /* anonymous enabling */
182 182 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
183 183 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
184 184 static kthread_t *dtrace_panicked; /* panicking thread */
185 185 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
186 186 static dtrace_genid_t dtrace_probegen; /* current probe generation */
187 187 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
188 188 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
189 189 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
190 190 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
191 191 static int dtrace_dynvar_failclean; /* dynvars failed to clean */
192 192
193 193 /*
194 194 * DTrace Locking
195 195 * DTrace is protected by three (relatively coarse-grained) locks:
196 196 *
197 197 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
198 198 * including enabling state, probes, ECBs, consumer state, helper state,
199 199 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
200 200 * probe context is lock-free -- synchronization is handled via the
201 201 * dtrace_sync() cross call mechanism.
202 202 *
203 203 * (2) dtrace_provider_lock is required when manipulating provider state, or
204 204 * when provider state must be held constant.
205 205 *
206 206 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
207 207 * when meta provider state must be held constant.
208 208 *
209 209 * The lock ordering between these three locks is dtrace_meta_lock before
210 210 * dtrace_provider_lock before dtrace_lock. (In particular, there are
211 211 * several places where dtrace_provider_lock is held by the framework as it
212 212 * calls into the providers -- which then call back into the framework,
213 213 * grabbing dtrace_lock.)
214 214 *
215 215 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
216 216 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
217 217 * role as a coarse-grained lock; it is acquired before both of these locks.
218 218 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
219 219 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
220 220 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
221 221 * acquired _between_ dtrace_provider_lock and dtrace_lock.
222 222 */
223 223 static kmutex_t dtrace_lock; /* probe state lock */
224 224 static kmutex_t dtrace_provider_lock; /* provider state lock */
225 225 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
226 226
227 227 /*
228 228 * DTrace Provider Variables
229 229 *
230 230 * These are the variables relating to DTrace as a provider (that is, the
231 231 * provider of the BEGIN, END, and ERROR probes).
232 232 */
233 233 static dtrace_pattr_t dtrace_provider_attr = {
234 234 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
235 235 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
236 236 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
237 237 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
238 238 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
239 239 };
240 240
241 241 static void
242 242 dtrace_nullop(void)
243 243 {}
244 244
245 245 static int
246 246 dtrace_enable_nullop(void)
247 247 {
248 248 return (0);
249 249 }
250 250
251 251 static dtrace_pops_t dtrace_provider_ops = {
252 252 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
253 253 (void (*)(void *, struct modctl *))dtrace_nullop,
254 254 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
255 255 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
256 256 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
257 257 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
258 258 NULL,
259 259 NULL,
260 260 NULL,
261 261 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
262 262 };
263 263
264 264 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
265 265 static dtrace_id_t dtrace_probeid_end; /* special END probe */
266 266 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
267 267
268 268 /*
269 269 * DTrace Helper Tracing Variables
270 270 */
271 271 uint32_t dtrace_helptrace_next = 0;
272 272 uint32_t dtrace_helptrace_nlocals;
273 273 char *dtrace_helptrace_buffer;
274 274 int dtrace_helptrace_bufsize = 512 * 1024;
275 275
276 276 #ifdef DEBUG
277 277 int dtrace_helptrace_enabled = 1;
278 278 #else
279 279 int dtrace_helptrace_enabled = 0;
280 280 #endif
281 281
282 282 /*
283 283 * DTrace Error Hashing
284 284 *
285 285 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
286 286 * table. This is very useful for checking coverage of tests that are
287 287 * expected to induce DIF or DOF processing errors, and may be useful for
288 288 * debugging problems in the DIF code generator or in DOF generation . The
289 289 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
290 290 */
291 291 #ifdef DEBUG
292 292 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
293 293 static const char *dtrace_errlast;
294 294 static kthread_t *dtrace_errthread;
295 295 static kmutex_t dtrace_errlock;
296 296 #endif
297 297
298 298 /*
299 299 * DTrace Macros and Constants
300 300 *
301 301 * These are various macros that are useful in various spots in the
302 302 * implementation, along with a few random constants that have no meaning
303 303 * outside of the implementation. There is no real structure to this cpp
304 304 * mishmash -- but is there ever?
305 305 */
306 306 #define DTRACE_HASHSTR(hash, probe) \
307 307 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
308 308
309 309 #define DTRACE_HASHNEXT(hash, probe) \
310 310 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
311 311
312 312 #define DTRACE_HASHPREV(hash, probe) \
313 313 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
314 314
315 315 #define DTRACE_HASHEQ(hash, lhs, rhs) \
316 316 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
317 317 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
318 318
319 319 #define DTRACE_AGGHASHSIZE_SLEW 17
320 320
321 321 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
322 322
323 323 /*
324 324 * The key for a thread-local variable consists of the lower 61 bits of the
325 325 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
326 326 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
327 327 * equal to a variable identifier. This is necessary (but not sufficient) to
328 328 * assure that global associative arrays never collide with thread-local
329 329 * variables. To guarantee that they cannot collide, we must also define the
330 330 * order for keying dynamic variables. That order is:
331 331 *
332 332 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
333 333 *
334 334 * Because the variable-key and the tls-key are in orthogonal spaces, there is
335 335 * no way for a global variable key signature to match a thread-local key
336 336 * signature.
337 337 */
338 338 #define DTRACE_TLS_THRKEY(where) { \
339 339 uint_t intr = 0; \
340 340 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
341 341 for (; actv; actv >>= 1) \
342 342 intr++; \
343 343 ASSERT(intr < (1 << 3)); \
344 344 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
345 345 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
346 346 }
347 347
348 348 #define DT_BSWAP_8(x) ((x) & 0xff)
349 349 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
350 350 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
351 351 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
352 352
353 353 #define DT_MASK_LO 0x00000000FFFFFFFFULL
354 354
355 355 #define DTRACE_STORE(type, tomax, offset, what) \
356 356 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
357 357
358 358 #ifndef __i386
359 359 #define DTRACE_ALIGNCHECK(addr, size, flags) \
360 360 if (addr & (size - 1)) { \
361 361 *flags |= CPU_DTRACE_BADALIGN; \
362 362 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
363 363 return (0); \
364 364 }
365 365 #else
366 366 #define DTRACE_ALIGNCHECK(addr, size, flags)
367 367 #endif
368 368
369 369 /*
370 370 * Test whether a range of memory starting at testaddr of size testsz falls
371 371 * within the range of memory described by addr, sz. We take care to avoid
372 372 * problems with overflow and underflow of the unsigned quantities, and
373 373 * disallow all negative sizes. Ranges of size 0 are allowed.
374 374 */
375 375 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
376 376 ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
377 377 (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
378 378 (testaddr) + (testsz) >= (testaddr))
379 379
380 380 /*
381 381 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
382 382 * alloc_sz on the righthand side of the comparison in order to avoid overflow
383 383 * or underflow in the comparison with it. This is simpler than the INRANGE
384 384 * check above, because we know that the dtms_scratch_ptr is valid in the
385 385 * range. Allocations of size zero are allowed.
386 386 */
387 387 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
388 388 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
389 389 (mstate)->dtms_scratch_ptr >= (alloc_sz))
390 390
391 391 #define DTRACE_LOADFUNC(bits) \
392 392 /*CSTYLED*/ \
393 393 uint##bits##_t \
394 394 dtrace_load##bits(uintptr_t addr) \
395 395 { \
396 396 size_t size = bits / NBBY; \
397 397 /*CSTYLED*/ \
398 398 uint##bits##_t rval; \
399 399 int i; \
400 400 volatile uint16_t *flags = (volatile uint16_t *) \
401 401 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
402 402 \
403 403 DTRACE_ALIGNCHECK(addr, size, flags); \
404 404 \
405 405 for (i = 0; i < dtrace_toxranges; i++) { \
406 406 if (addr >= dtrace_toxrange[i].dtt_limit) \
407 407 continue; \
408 408 \
409 409 if (addr + size <= dtrace_toxrange[i].dtt_base) \
410 410 continue; \
411 411 \
412 412 /* \
413 413 * This address falls within a toxic region; return 0. \
414 414 */ \
415 415 *flags |= CPU_DTRACE_BADADDR; \
416 416 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
417 417 return (0); \
418 418 } \
419 419 \
420 420 *flags |= CPU_DTRACE_NOFAULT; \
421 421 /*CSTYLED*/ \
422 422 rval = *((volatile uint##bits##_t *)addr); \
423 423 *flags &= ~CPU_DTRACE_NOFAULT; \
424 424 \
425 425 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
426 426 }
427 427
428 428 #ifdef _LP64
429 429 #define dtrace_loadptr dtrace_load64
430 430 #else
431 431 #define dtrace_loadptr dtrace_load32
432 432 #endif
433 433
434 434 #define DTRACE_DYNHASH_FREE 0
435 435 #define DTRACE_DYNHASH_SINK 1
436 436 #define DTRACE_DYNHASH_VALID 2
437 437
438 438 #define DTRACE_MATCH_FAIL -1
439 439 #define DTRACE_MATCH_NEXT 0
440 440 #define DTRACE_MATCH_DONE 1
441 441 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
442 442 #define DTRACE_STATE_ALIGN 64
443 443
444 444 #define DTRACE_FLAGS2FLT(flags) \
445 445 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
446 446 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
447 447 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
448 448 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
449 449 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
450 450 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
451 451 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
452 452 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
453 453 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
454 454 DTRACEFLT_UNKNOWN)
455 455
456 456 #define DTRACEACT_ISSTRING(act) \
457 457 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
458 458 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
459 459
460 460 static size_t dtrace_strlen(const char *, size_t);
461 461 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
462 462 static void dtrace_enabling_provide(dtrace_provider_t *);
463 463 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
464 464 static void dtrace_enabling_matchall(void);
465 465 static void dtrace_enabling_reap(void);
466 466 static dtrace_state_t *dtrace_anon_grab(void);
467 467 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
468 468 dtrace_state_t *, uint64_t, uint64_t);
469 469 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
470 470 static void dtrace_buffer_drop(dtrace_buffer_t *);
471 471 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
472 472 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
473 473 dtrace_state_t *, dtrace_mstate_t *);
474 474 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
475 475 dtrace_optval_t);
476 476 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
477 477 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
478 478 static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
479 479 static void dtrace_getf_barrier(void);
480 480
481 481 /*
482 482 * DTrace Probe Context Functions
483 483 *
484 484 * These functions are called from probe context. Because probe context is
485 485 * any context in which C may be called, arbitrarily locks may be held,
486 486 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
487 487 * As a result, functions called from probe context may only call other DTrace
488 488 * support functions -- they may not interact at all with the system at large.
489 489 * (Note that the ASSERT macro is made probe-context safe by redefining it in
490 490 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
491 491 * loads are to be performed from probe context, they _must_ be in terms of
492 492 * the safe dtrace_load*() variants.
493 493 *
494 494 * Some functions in this block are not actually called from probe context;
495 495 * for these functions, there will be a comment above the function reading
496 496 * "Note: not called from probe context."
497 497 */
498 498 void
499 499 dtrace_panic(const char *format, ...)
500 500 {
501 501 va_list alist;
502 502
503 503 va_start(alist, format);
504 504 dtrace_vpanic(format, alist);
505 505 va_end(alist);
506 506 }
507 507
508 508 int
509 509 dtrace_assfail(const char *a, const char *f, int l)
510 510 {
511 511 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
512 512
513 513 /*
514 514 * We just need something here that even the most clever compiler
515 515 * cannot optimize away.
516 516 */
517 517 return (a[(uintptr_t)f]);
518 518 }
519 519
520 520 /*
521 521 * Atomically increment a specified error counter from probe context.
522 522 */
523 523 static void
524 524 dtrace_error(uint32_t *counter)
525 525 {
526 526 /*
527 527 * Most counters stored to in probe context are per-CPU counters.
528 528 * However, there are some error conditions that are sufficiently
529 529 * arcane that they don't merit per-CPU storage. If these counters
530 530 * are incremented concurrently on different CPUs, scalability will be
531 531 * adversely affected -- but we don't expect them to be white-hot in a
532 532 * correctly constructed enabling...
533 533 */
534 534 uint32_t oval, nval;
535 535
536 536 do {
537 537 oval = *counter;
538 538
539 539 if ((nval = oval + 1) == 0) {
540 540 /*
541 541 * If the counter would wrap, set it to 1 -- assuring
542 542 * that the counter is never zero when we have seen
543 543 * errors. (The counter must be 32-bits because we
544 544 * aren't guaranteed a 64-bit compare&swap operation.)
545 545 * To save this code both the infamy of being fingered
546 546 * by a priggish news story and the indignity of being
547 547 * the target of a neo-puritan witch trial, we're
548 548 * carefully avoiding any colorful description of the
549 549 * likelihood of this condition -- but suffice it to
550 550 * say that it is only slightly more likely than the
551 551 * overflow of predicate cache IDs, as discussed in
552 552 * dtrace_predicate_create().
553 553 */
554 554 nval = 1;
555 555 }
556 556 } while (dtrace_cas32(counter, oval, nval) != oval);
557 557 }
558 558
559 559 /*
560 560 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
561 561 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
562 562 */
563 563 DTRACE_LOADFUNC(8)
564 564 DTRACE_LOADFUNC(16)
565 565 DTRACE_LOADFUNC(32)
566 566 DTRACE_LOADFUNC(64)
567 567
568 568 static int
569 569 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
570 570 {
571 571 if (dest < mstate->dtms_scratch_base)
572 572 return (0);
573 573
574 574 if (dest + size < dest)
575 575 return (0);
576 576
577 577 if (dest + size > mstate->dtms_scratch_ptr)
578 578 return (0);
579 579
580 580 return (1);
581 581 }
582 582
583 583 static int
584 584 dtrace_canstore_statvar(uint64_t addr, size_t sz,
585 585 dtrace_statvar_t **svars, int nsvars)
586 586 {
587 587 int i;
588 588
589 589 for (i = 0; i < nsvars; i++) {
590 590 dtrace_statvar_t *svar = svars[i];
591 591
592 592 if (svar == NULL || svar->dtsv_size == 0)
593 593 continue;
594 594
595 595 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size))
596 596 return (1);
597 597 }
598 598
599 599 return (0);
600 600 }
601 601
602 602 /*
603 603 * Check to see if the address is within a memory region to which a store may
604 604 * be issued. This includes the DTrace scratch areas, and any DTrace variable
605 605 * region. The caller of dtrace_canstore() is responsible for performing any
606 606 * alignment checks that are needed before stores are actually executed.
607 607 */
608 608 static int
609 609 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
610 610 dtrace_vstate_t *vstate)
611 611 {
612 612 /*
613 613 * First, check to see if the address is in scratch space...
614 614 */
615 615 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
616 616 mstate->dtms_scratch_size))
617 617 return (1);
618 618
619 619 /*
620 620 * Now check to see if it's a dynamic variable. This check will pick
621 621 * up both thread-local variables and any global dynamically-allocated
622 622 * variables.
623 623 */
624 624 if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
625 625 vstate->dtvs_dynvars.dtds_size)) {
626 626 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
627 627 uintptr_t base = (uintptr_t)dstate->dtds_base +
628 628 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
629 629 uintptr_t chunkoffs;
630 630
631 631 /*
632 632 * Before we assume that we can store here, we need to make
633 633 * sure that it isn't in our metadata -- storing to our
634 634 * dynamic variable metadata would corrupt our state. For
635 635 * the range to not include any dynamic variable metadata,
636 636 * it must:
637 637 *
638 638 * (1) Start above the hash table that is at the base of
639 639 * the dynamic variable space
640 640 *
641 641 * (2) Have a starting chunk offset that is beyond the
642 642 * dtrace_dynvar_t that is at the base of every chunk
643 643 *
644 644 * (3) Not span a chunk boundary
645 645 *
646 646 */
647 647 if (addr < base)
648 648 return (0);
649 649
650 650 chunkoffs = (addr - base) % dstate->dtds_chunksize;
651 651
652 652 if (chunkoffs < sizeof (dtrace_dynvar_t))
653 653 return (0);
654 654
655 655 if (chunkoffs + sz > dstate->dtds_chunksize)
656 656 return (0);
657 657
658 658 return (1);
659 659 }
660 660
661 661 /*
662 662 * Finally, check the static local and global variables. These checks
663 663 * take the longest, so we perform them last.
664 664 */
665 665 if (dtrace_canstore_statvar(addr, sz,
666 666 vstate->dtvs_locals, vstate->dtvs_nlocals))
667 667 return (1);
668 668
669 669 if (dtrace_canstore_statvar(addr, sz,
670 670 vstate->dtvs_globals, vstate->dtvs_nglobals))
671 671 return (1);
672 672
673 673 return (0);
674 674 }
675 675
676 676
677 677 /*
678 678 * Convenience routine to check to see if the address is within a memory
679 679 * region in which a load may be issued given the user's privilege level;
680 680 * if not, it sets the appropriate error flags and loads 'addr' into the
681 681 * illegal value slot.
682 682 *
683 683 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
684 684 * appropriate memory access protection.
685 685 */
686 686 static int
687 687 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
688 688 dtrace_vstate_t *vstate)
689 689 {
690 690 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
691 691 file_t *fp;
692 692
693 693 /*
694 694 * If we hold the privilege to read from kernel memory, then
695 695 * everything is readable.
696 696 */
697 697 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
698 698 return (1);
699 699
700 700 /*
701 701 * You can obviously read that which you can store.
702 702 */
703 703 if (dtrace_canstore(addr, sz, mstate, vstate))
704 704 return (1);
705 705
706 706 /*
707 707 * We're allowed to read from our own string table.
708 708 */
709 709 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
710 710 mstate->dtms_difo->dtdo_strlen))
711 711 return (1);
712 712
713 713 if (vstate->dtvs_state != NULL &&
714 714 dtrace_priv_proc(vstate->dtvs_state, mstate)) {
715 715 proc_t *p;
716 716
717 717 /*
718 718 * When we have privileges to the current process, there are
719 719 * several context-related kernel structures that are safe to
720 720 * read, even absent the privilege to read from kernel memory.
721 721 * These reads are safe because these structures contain only
722 722 * state that (1) we're permitted to read, (2) is harmless or
723 723 * (3) contains pointers to additional kernel state that we're
724 724 * not permitted to read (and as such, do not present an
725 725 * opportunity for privilege escalation). Finally (and
726 726 * critically), because of the nature of their relation with
727 727 * the current thread context, the memory associated with these
728 728 * structures cannot change over the duration of probe context,
729 729 * and it is therefore impossible for this memory to be
730 730 * deallocated and reallocated as something else while it's
731 731 * being operated upon.
732 732 */
733 733 if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
734 734 return (1);
735 735
736 736 if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
737 737 sz, curthread->t_procp, sizeof (proc_t))) {
738 738 return (1);
739 739 }
740 740
741 741 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
742 742 curthread->t_cred, sizeof (cred_t))) {
743 743 return (1);
744 744 }
745 745
746 746 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
747 747 &(p->p_pidp->pid_id), sizeof (pid_t))) {
748 748 return (1);
749 749 }
750 750
751 751 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
752 752 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
753 753 return (1);
754 754 }
755 755 }
756 756
757 757 if ((fp = mstate->dtms_getf) != NULL) {
758 758 uintptr_t psz = sizeof (void *);
759 759 vnode_t *vp;
760 760 vnodeops_t *op;
761 761
762 762 /*
763 763 * When getf() returns a file_t, the enabling is implicitly
764 764 * granted the (transient) right to read the returned file_t
765 765 * as well as the v_path and v_op->vnop_name of the underlying
766 766 * vnode. These accesses are allowed after a successful
767 767 * getf() because the members that they refer to cannot change
768 768 * once set -- and the barrier logic in the kernel's closef()
769 769 * path assures that the file_t and its referenced vode_t
770 770 * cannot themselves be stale (that is, it impossible for
771 771 * either dtms_getf itself or its f_vnode member to reference
772 772 * freed memory).
773 773 */
774 774 if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
775 775 return (1);
776 776
777 777 if ((vp = fp->f_vnode) != NULL) {
778 778 if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
779 779 return (1);
780 780
781 781 if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
782 782 vp->v_path, strlen(vp->v_path) + 1)) {
783 783 return (1);
784 784 }
785 785
786 786 if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
787 787 return (1);
788 788
789 789 if ((op = vp->v_op) != NULL &&
790 790 DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
791 791 return (1);
792 792 }
793 793
794 794 if (op != NULL && op->vnop_name != NULL &&
795 795 DTRACE_INRANGE(addr, sz, op->vnop_name,
796 796 strlen(op->vnop_name) + 1)) {
797 797 return (1);
798 798 }
799 799 }
800 800 }
801 801
802 802 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
803 803 *illval = addr;
804 804 return (0);
805 805 }
806 806
807 807 /*
808 808 * Convenience routine to check to see if a given string is within a memory
809 809 * region in which a load may be issued given the user's privilege level;
810 810 * this exists so that we don't need to issue unnecessary dtrace_strlen()
811 811 * calls in the event that the user has all privileges.
812 812 */
813 813 static int
814 814 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
815 815 dtrace_vstate_t *vstate)
816 816 {
817 817 size_t strsz;
818 818
819 819 /*
820 820 * If we hold the privilege to read from kernel memory, then
821 821 * everything is readable.
822 822 */
823 823 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
824 824 return (1);
825 825
826 826 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz);
827 827 if (dtrace_canload(addr, strsz, mstate, vstate))
828 828 return (1);
829 829
830 830 return (0);
831 831 }
832 832
833 833 /*
834 834 * Convenience routine to check to see if a given variable is within a memory
835 835 * region in which a load may be issued given the user's privilege level.
836 836 */
837 837 static int
838 838 dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
839 839 dtrace_vstate_t *vstate)
840 840 {
841 841 size_t sz;
842 842 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
843 843
844 844 /*
845 845 * If we hold the privilege to read from kernel memory, then
846 846 * everything is readable.
847 847 */
848 848 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
849 849 return (1);
850 850
851 851 if (type->dtdt_kind == DIF_TYPE_STRING)
852 852 sz = dtrace_strlen(src,
853 853 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
854 854 else
855 855 sz = type->dtdt_size;
856 856
857 857 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
858 858 }
859 859
860 860 /*
861 861 * Compare two strings using safe loads.
862 862 */
863 863 static int
864 864 dtrace_strncmp(char *s1, char *s2, size_t limit)
865 865 {
866 866 uint8_t c1, c2;
867 867 volatile uint16_t *flags;
868 868
869 869 if (s1 == s2 || limit == 0)
870 870 return (0);
871 871
872 872 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
873 873
874 874 do {
875 875 if (s1 == NULL) {
876 876 c1 = '\0';
877 877 } else {
878 878 c1 = dtrace_load8((uintptr_t)s1++);
879 879 }
880 880
881 881 if (s2 == NULL) {
882 882 c2 = '\0';
883 883 } else {
884 884 c2 = dtrace_load8((uintptr_t)s2++);
885 885 }
886 886
887 887 if (c1 != c2)
888 888 return (c1 - c2);
889 889 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
890 890
891 891 return (0);
892 892 }
893 893
894 894 /*
895 895 * Compute strlen(s) for a string using safe memory accesses. The additional
896 896 * len parameter is used to specify a maximum length to ensure completion.
897 897 */
898 898 static size_t
899 899 dtrace_strlen(const char *s, size_t lim)
900 900 {
901 901 uint_t len;
902 902
903 903 for (len = 0; len != lim; len++) {
904 904 if (dtrace_load8((uintptr_t)s++) == '\0')
905 905 break;
906 906 }
907 907
908 908 return (len);
909 909 }
910 910
911 911 /*
912 912 * Check if an address falls within a toxic region.
913 913 */
914 914 static int
915 915 dtrace_istoxic(uintptr_t kaddr, size_t size)
916 916 {
917 917 uintptr_t taddr, tsize;
918 918 int i;
919 919
920 920 for (i = 0; i < dtrace_toxranges; i++) {
921 921 taddr = dtrace_toxrange[i].dtt_base;
922 922 tsize = dtrace_toxrange[i].dtt_limit - taddr;
923 923
924 924 if (kaddr - taddr < tsize) {
925 925 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
926 926 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
927 927 return (1);
928 928 }
929 929
930 930 if (taddr - kaddr < size) {
931 931 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
932 932 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
933 933 return (1);
934 934 }
935 935 }
936 936
937 937 return (0);
938 938 }
939 939
940 940 /*
941 941 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
942 942 * memory specified by the DIF program. The dst is assumed to be safe memory
943 943 * that we can store to directly because it is managed by DTrace. As with
944 944 * standard bcopy, overlapping copies are handled properly.
945 945 */
946 946 static void
947 947 dtrace_bcopy(const void *src, void *dst, size_t len)
948 948 {
949 949 if (len != 0) {
950 950 uint8_t *s1 = dst;
951 951 const uint8_t *s2 = src;
952 952
953 953 if (s1 <= s2) {
954 954 do {
955 955 *s1++ = dtrace_load8((uintptr_t)s2++);
956 956 } while (--len != 0);
957 957 } else {
958 958 s2 += len;
959 959 s1 += len;
960 960
961 961 do {
962 962 *--s1 = dtrace_load8((uintptr_t)--s2);
963 963 } while (--len != 0);
964 964 }
965 965 }
966 966 }
967 967
968 968 /*
969 969 * Copy src to dst using safe memory accesses, up to either the specified
970 970 * length, or the point that a nul byte is encountered. The src is assumed to
971 971 * be unsafe memory specified by the DIF program. The dst is assumed to be
972 972 * safe memory that we can store to directly because it is managed by DTrace.
973 973 * Unlike dtrace_bcopy(), overlapping regions are not handled.
974 974 */
975 975 static void
976 976 dtrace_strcpy(const void *src, void *dst, size_t len)
977 977 {
978 978 if (len != 0) {
979 979 uint8_t *s1 = dst, c;
980 980 const uint8_t *s2 = src;
981 981
982 982 do {
983 983 *s1++ = c = dtrace_load8((uintptr_t)s2++);
984 984 } while (--len != 0 && c != '\0');
985 985 }
986 986 }
987 987
988 988 /*
989 989 * Copy src to dst, deriving the size and type from the specified (BYREF)
990 990 * variable type. The src is assumed to be unsafe memory specified by the DIF
991 991 * program. The dst is assumed to be DTrace variable memory that is of the
992 992 * specified type; we assume that we can store to directly.
993 993 */
994 994 static void
995 995 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type)
996 996 {
997 997 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
998 998
999 999 if (type->dtdt_kind == DIF_TYPE_STRING) {
1000 1000 dtrace_strcpy(src, dst, type->dtdt_size);
1001 1001 } else {
1002 1002 dtrace_bcopy(src, dst, type->dtdt_size);
1003 1003 }
1004 1004 }
1005 1005
1006 1006 /*
1007 1007 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1008 1008 * unsafe memory specified by the DIF program. The s2 data is assumed to be
1009 1009 * safe memory that we can access directly because it is managed by DTrace.
1010 1010 */
1011 1011 static int
1012 1012 dtrace_bcmp(const void *s1, const void *s2, size_t len)
1013 1013 {
1014 1014 volatile uint16_t *flags;
1015 1015
1016 1016 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1017 1017
1018 1018 if (s1 == s2)
1019 1019 return (0);
1020 1020
1021 1021 if (s1 == NULL || s2 == NULL)
1022 1022 return (1);
1023 1023
1024 1024 if (s1 != s2 && len != 0) {
1025 1025 const uint8_t *ps1 = s1;
1026 1026 const uint8_t *ps2 = s2;
1027 1027
1028 1028 do {
1029 1029 if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
1030 1030 return (1);
1031 1031 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
1032 1032 }
1033 1033 return (0);
1034 1034 }
1035 1035
1036 1036 /*
1037 1037 * Zero the specified region using a simple byte-by-byte loop. Note that this
1038 1038 * is for safe DTrace-managed memory only.
1039 1039 */
1040 1040 static void
1041 1041 dtrace_bzero(void *dst, size_t len)
1042 1042 {
1043 1043 uchar_t *cp;
1044 1044
1045 1045 for (cp = dst; len != 0; len--)
1046 1046 *cp++ = 0;
1047 1047 }
1048 1048
1049 1049 static void
1050 1050 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
1051 1051 {
1052 1052 uint64_t result[2];
1053 1053
1054 1054 result[0] = addend1[0] + addend2[0];
1055 1055 result[1] = addend1[1] + addend2[1] +
1056 1056 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
1057 1057
1058 1058 sum[0] = result[0];
1059 1059 sum[1] = result[1];
1060 1060 }
1061 1061
1062 1062 /*
1063 1063 * Shift the 128-bit value in a by b. If b is positive, shift left.
1064 1064 * If b is negative, shift right.
1065 1065 */
1066 1066 static void
1067 1067 dtrace_shift_128(uint64_t *a, int b)
1068 1068 {
1069 1069 uint64_t mask;
1070 1070
1071 1071 if (b == 0)
1072 1072 return;
1073 1073
1074 1074 if (b < 0) {
1075 1075 b = -b;
1076 1076 if (b >= 64) {
1077 1077 a[0] = a[1] >> (b - 64);
1078 1078 a[1] = 0;
1079 1079 } else {
1080 1080 a[0] >>= b;
1081 1081 mask = 1LL << (64 - b);
1082 1082 mask -= 1;
1083 1083 a[0] |= ((a[1] & mask) << (64 - b));
1084 1084 a[1] >>= b;
1085 1085 }
1086 1086 } else {
1087 1087 if (b >= 64) {
1088 1088 a[1] = a[0] << (b - 64);
1089 1089 a[0] = 0;
1090 1090 } else {
1091 1091 a[1] <<= b;
1092 1092 mask = a[0] >> (64 - b);
1093 1093 a[1] |= mask;
1094 1094 a[0] <<= b;
1095 1095 }
1096 1096 }
1097 1097 }
1098 1098
1099 1099 /*
1100 1100 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1101 1101 * use native multiplication on those, and then re-combine into the
1102 1102 * resulting 128-bit value.
1103 1103 *
1104 1104 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1105 1105 * hi1 * hi2 << 64 +
1106 1106 * hi1 * lo2 << 32 +
1107 1107 * hi2 * lo1 << 32 +
1108 1108 * lo1 * lo2
1109 1109 */
1110 1110 static void
1111 1111 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1112 1112 {
1113 1113 uint64_t hi1, hi2, lo1, lo2;
1114 1114 uint64_t tmp[2];
1115 1115
1116 1116 hi1 = factor1 >> 32;
1117 1117 hi2 = factor2 >> 32;
1118 1118
1119 1119 lo1 = factor1 & DT_MASK_LO;
1120 1120 lo2 = factor2 & DT_MASK_LO;
1121 1121
1122 1122 product[0] = lo1 * lo2;
1123 1123 product[1] = hi1 * hi2;
1124 1124
1125 1125 tmp[0] = hi1 * lo2;
1126 1126 tmp[1] = 0;
1127 1127 dtrace_shift_128(tmp, 32);
1128 1128 dtrace_add_128(product, tmp, product);
1129 1129
1130 1130 tmp[0] = hi2 * lo1;
1131 1131 tmp[1] = 0;
1132 1132 dtrace_shift_128(tmp, 32);
1133 1133 dtrace_add_128(product, tmp, product);
1134 1134 }
1135 1135
1136 1136 /*
1137 1137 * This privilege check should be used by actions and subroutines to
1138 1138 * verify that the user credentials of the process that enabled the
1139 1139 * invoking ECB match the target credentials
1140 1140 */
1141 1141 static int
1142 1142 dtrace_priv_proc_common_user(dtrace_state_t *state)
1143 1143 {
1144 1144 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1145 1145
1146 1146 /*
1147 1147 * We should always have a non-NULL state cred here, since if cred
1148 1148 * is null (anonymous tracing), we fast-path bypass this routine.
1149 1149 */
1150 1150 ASSERT(s_cr != NULL);
1151 1151
1152 1152 if ((cr = CRED()) != NULL &&
1153 1153 s_cr->cr_uid == cr->cr_uid &&
1154 1154 s_cr->cr_uid == cr->cr_ruid &&
1155 1155 s_cr->cr_uid == cr->cr_suid &&
1156 1156 s_cr->cr_gid == cr->cr_gid &&
1157 1157 s_cr->cr_gid == cr->cr_rgid &&
1158 1158 s_cr->cr_gid == cr->cr_sgid)
1159 1159 return (1);
1160 1160
1161 1161 return (0);
1162 1162 }
1163 1163
1164 1164 /*
1165 1165 * This privilege check should be used by actions and subroutines to
1166 1166 * verify that the zone of the process that enabled the invoking ECB
1167 1167 * matches the target credentials
1168 1168 */
1169 1169 static int
1170 1170 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1171 1171 {
1172 1172 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1173 1173
1174 1174 /*
1175 1175 * We should always have a non-NULL state cred here, since if cred
1176 1176 * is null (anonymous tracing), we fast-path bypass this routine.
1177 1177 */
1178 1178 ASSERT(s_cr != NULL);
1179 1179
1180 1180 if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
1181 1181 return (1);
1182 1182
1183 1183 return (0);
1184 1184 }
1185 1185
1186 1186 /*
1187 1187 * This privilege check should be used by actions and subroutines to
1188 1188 * verify that the process has not setuid or changed credentials.
1189 1189 */
1190 1190 static int
1191 1191 dtrace_priv_proc_common_nocd()
1192 1192 {
1193 1193 proc_t *proc;
1194 1194
1195 1195 if ((proc = ttoproc(curthread)) != NULL &&
1196 1196 !(proc->p_flag & SNOCD))
1197 1197 return (1);
1198 1198
1199 1199 return (0);
1200 1200 }
1201 1201
1202 1202 static int
1203 1203 dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
1204 1204 {
1205 1205 int action = state->dts_cred.dcr_action;
1206 1206
1207 1207 if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
1208 1208 goto bad;
1209 1209
1210 1210 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1211 1211 dtrace_priv_proc_common_zone(state) == 0)
1212 1212 goto bad;
1213 1213
1214 1214 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1215 1215 dtrace_priv_proc_common_user(state) == 0)
1216 1216 goto bad;
1217 1217
1218 1218 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1219 1219 dtrace_priv_proc_common_nocd() == 0)
1220 1220 goto bad;
1221 1221
1222 1222 return (1);
1223 1223
1224 1224 bad:
1225 1225 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1226 1226
1227 1227 return (0);
1228 1228 }
1229 1229
1230 1230 static int
1231 1231 dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
1232 1232 {
1233 1233 if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
1234 1234 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1235 1235 return (1);
1236 1236
1237 1237 if (dtrace_priv_proc_common_zone(state) &&
1238 1238 dtrace_priv_proc_common_user(state) &&
1239 1239 dtrace_priv_proc_common_nocd())
1240 1240 return (1);
1241 1241 }
1242 1242
1243 1243 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1244 1244
1245 1245 return (0);
1246 1246 }
1247 1247
1248 1248 static int
1249 1249 dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
1250 1250 {
1251 1251 if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
1252 1252 (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
1253 1253 return (1);
1254 1254
1255 1255 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1256 1256
1257 1257 return (0);
1258 1258 }
1259 1259
1260 1260 static int
1261 1261 dtrace_priv_kernel(dtrace_state_t *state)
1262 1262 {
1263 1263 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1264 1264 return (1);
1265 1265
1266 1266 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1267 1267
1268 1268 return (0);
1269 1269 }
1270 1270
1271 1271 static int
1272 1272 dtrace_priv_kernel_destructive(dtrace_state_t *state)
1273 1273 {
1274 1274 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1275 1275 return (1);
1276 1276
1277 1277 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1278 1278
1279 1279 return (0);
1280 1280 }
1281 1281
1282 1282 /*
1283 1283 * Determine if the dte_cond of the specified ECB allows for processing of
1284 1284 * the current probe to continue. Note that this routine may allow continued
1285 1285 * processing, but with access(es) stripped from the mstate's dtms_access
1286 1286 * field.
1287 1287 */
1288 1288 static int
1289 1289 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1290 1290 dtrace_ecb_t *ecb)
1291 1291 {
1292 1292 dtrace_probe_t *probe = ecb->dte_probe;
↓ open down ↓ |
1292 lines elided |
↑ open up ↑ |
1293 1293 dtrace_provider_t *prov = probe->dtpr_provider;
1294 1294 dtrace_pops_t *pops = &prov->dtpv_pops;
1295 1295 int mode = DTRACE_MODE_NOPRIV_DROP;
1296 1296
1297 1297 ASSERT(ecb->dte_cond);
1298 1298
1299 1299 if (pops->dtps_mode != NULL) {
1300 1300 mode = pops->dtps_mode(prov->dtpv_arg,
1301 1301 probe->dtpr_id, probe->dtpr_arg);
1302 1302
1303 - ASSERT((mode & DTRACE_MODE_USER) ||
1304 - (mode & DTRACE_MODE_KERNEL));
1305 - ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
1306 - (mode & DTRACE_MODE_NOPRIV_DROP));
1303 + ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
1304 + ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
1305 + DTRACE_MODE_NOPRIV_DROP));
1307 1306 }
1308 1307
1309 1308 /*
1310 1309 * If the dte_cond bits indicate that this consumer is only allowed to
1311 - * see user-mode firings of this probe, call the provider's dtps_mode()
1312 - * entry point to check that the probe was fired while in a user
1313 - * context. If that's not the case, use the policy specified by the
1314 - * provider to determine if we drop the probe or merely restrict
1315 - * operation.
1310 + * see user-mode firings of this probe, check that the probe was fired
1311 + * while in a user context. If that's not the case, use the policy
1312 + * specified by the provider to determine if we drop the probe or
1313 + * merely restrict operation.
1316 1314 */
1317 1315 if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1318 1316 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1319 1317
1320 1318 if (!(mode & DTRACE_MODE_USER)) {
1321 1319 if (mode & DTRACE_MODE_NOPRIV_DROP)
1322 1320 return (0);
1323 1321
1324 1322 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1325 1323 }
1326 1324 }
1327 1325
1328 1326 /*
1329 1327 * This is more subtle than it looks. We have to be absolutely certain
1330 1328 * that CRED() isn't going to change out from under us so it's only
1331 1329 * legit to examine that structure if we're in constrained situations.
1332 1330 * Currently, the only times we'll this check is if a non-super-user
1333 1331 * has enabled the profile or syscall providers -- providers that
1334 1332 * allow visibility of all processes. For the profile case, the check
1335 1333 * above will ensure that we're examining a user context.
1336 1334 */
1337 1335 if (ecb->dte_cond & DTRACE_COND_OWNER) {
1338 1336 cred_t *cr;
1339 1337 cred_t *s_cr = state->dts_cred.dcr_cred;
1340 1338 proc_t *proc;
1341 1339
1342 1340 ASSERT(s_cr != NULL);
1343 1341
1344 1342 if ((cr = CRED()) == NULL ||
1345 1343 s_cr->cr_uid != cr->cr_uid ||
1346 1344 s_cr->cr_uid != cr->cr_ruid ||
1347 1345 s_cr->cr_uid != cr->cr_suid ||
1348 1346 s_cr->cr_gid != cr->cr_gid ||
1349 1347 s_cr->cr_gid != cr->cr_rgid ||
1350 1348 s_cr->cr_gid != cr->cr_sgid ||
1351 1349 (proc = ttoproc(curthread)) == NULL ||
1352 1350 (proc->p_flag & SNOCD)) {
1353 1351 if (mode & DTRACE_MODE_NOPRIV_DROP)
1354 1352 return (0);
1355 1353
1356 1354 mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
1357 1355 }
1358 1356 }
1359 1357
1360 1358 /*
1361 1359 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
1362 1360 * in our zone, check to see if our mode policy is to restrict rather
1363 1361 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1364 1362 * and DTRACE_ACCESS_ARGS
1365 1363 */
1366 1364 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1367 1365 cred_t *cr;
1368 1366 cred_t *s_cr = state->dts_cred.dcr_cred;
1369 1367
1370 1368 ASSERT(s_cr != NULL);
1371 1369
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
1372 1370 if ((cr = CRED()) == NULL ||
1373 1371 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1374 1372 if (mode & DTRACE_MODE_NOPRIV_DROP)
1375 1373 return (0);
1376 1374
1377 1375 mstate->dtms_access &=
1378 1376 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1379 1377 }
1380 1378 }
1381 1379
1380 + /*
1381 + * By merits of being in this code path at all, we have limited
1382 + * privileges. If the provider has indicated that limited privileges
1383 + * are to denote restricted operation, strip off the ability to access
1384 + * arguments.
1385 + */
1386 + if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
1387 + mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1388 +
1382 1389 return (1);
1383 1390 }
1384 1391
1385 1392 /*
1386 1393 * Note: not called from probe context. This function is called
1387 1394 * asynchronously (and at a regular interval) from outside of probe context to
1388 1395 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1389 1396 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1390 1397 */
1391 1398 void
1392 1399 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1393 1400 {
1394 1401 dtrace_dynvar_t *dirty;
1395 1402 dtrace_dstate_percpu_t *dcpu;
1396 1403 dtrace_dynvar_t **rinsep;
1397 1404 int i, j, work = 0;
1398 1405
1399 1406 for (i = 0; i < NCPU; i++) {
1400 1407 dcpu = &dstate->dtds_percpu[i];
1401 1408 rinsep = &dcpu->dtdsc_rinsing;
1402 1409
1403 1410 /*
1404 1411 * If the dirty list is NULL, there is no dirty work to do.
1405 1412 */
1406 1413 if (dcpu->dtdsc_dirty == NULL)
1407 1414 continue;
1408 1415
1409 1416 if (dcpu->dtdsc_rinsing != NULL) {
1410 1417 /*
1411 1418 * If the rinsing list is non-NULL, then it is because
1412 1419 * this CPU was selected to accept another CPU's
1413 1420 * dirty list -- and since that time, dirty buffers
1414 1421 * have accumulated. This is a highly unlikely
1415 1422 * condition, but we choose to ignore the dirty
1416 1423 * buffers -- they'll be picked up a future cleanse.
1417 1424 */
1418 1425 continue;
1419 1426 }
1420 1427
1421 1428 if (dcpu->dtdsc_clean != NULL) {
1422 1429 /*
1423 1430 * If the clean list is non-NULL, then we're in a
1424 1431 * situation where a CPU has done deallocations (we
1425 1432 * have a non-NULL dirty list) but no allocations (we
1426 1433 * also have a non-NULL clean list). We can't simply
1427 1434 * move the dirty list into the clean list on this
1428 1435 * CPU, yet we also don't want to allow this condition
1429 1436 * to persist, lest a short clean list prevent a
1430 1437 * massive dirty list from being cleaned (which in
1431 1438 * turn could lead to otherwise avoidable dynamic
1432 1439 * drops). To deal with this, we look for some CPU
1433 1440 * with a NULL clean list, NULL dirty list, and NULL
1434 1441 * rinsing list -- and then we borrow this CPU to
1435 1442 * rinse our dirty list.
1436 1443 */
1437 1444 for (j = 0; j < NCPU; j++) {
1438 1445 dtrace_dstate_percpu_t *rinser;
1439 1446
1440 1447 rinser = &dstate->dtds_percpu[j];
1441 1448
1442 1449 if (rinser->dtdsc_rinsing != NULL)
1443 1450 continue;
1444 1451
1445 1452 if (rinser->dtdsc_dirty != NULL)
1446 1453 continue;
1447 1454
1448 1455 if (rinser->dtdsc_clean != NULL)
1449 1456 continue;
1450 1457
1451 1458 rinsep = &rinser->dtdsc_rinsing;
1452 1459 break;
1453 1460 }
1454 1461
1455 1462 if (j == NCPU) {
1456 1463 /*
1457 1464 * We were unable to find another CPU that
1458 1465 * could accept this dirty list -- we are
1459 1466 * therefore unable to clean it now.
1460 1467 */
1461 1468 dtrace_dynvar_failclean++;
1462 1469 continue;
1463 1470 }
1464 1471 }
1465 1472
1466 1473 work = 1;
1467 1474
1468 1475 /*
1469 1476 * Atomically move the dirty list aside.
1470 1477 */
1471 1478 do {
1472 1479 dirty = dcpu->dtdsc_dirty;
1473 1480
1474 1481 /*
1475 1482 * Before we zap the dirty list, set the rinsing list.
1476 1483 * (This allows for a potential assertion in
1477 1484 * dtrace_dynvar(): if a free dynamic variable appears
1478 1485 * on a hash chain, either the dirty list or the
1479 1486 * rinsing list for some CPU must be non-NULL.)
1480 1487 */
1481 1488 *rinsep = dirty;
1482 1489 dtrace_membar_producer();
1483 1490 } while (dtrace_casptr(&dcpu->dtdsc_dirty,
1484 1491 dirty, NULL) != dirty);
1485 1492 }
1486 1493
1487 1494 if (!work) {
1488 1495 /*
1489 1496 * We have no work to do; we can simply return.
1490 1497 */
1491 1498 return;
1492 1499 }
1493 1500
1494 1501 dtrace_sync();
1495 1502
1496 1503 for (i = 0; i < NCPU; i++) {
1497 1504 dcpu = &dstate->dtds_percpu[i];
1498 1505
1499 1506 if (dcpu->dtdsc_rinsing == NULL)
1500 1507 continue;
1501 1508
1502 1509 /*
1503 1510 * We are now guaranteed that no hash chain contains a pointer
1504 1511 * into this dirty list; we can make it clean.
1505 1512 */
1506 1513 ASSERT(dcpu->dtdsc_clean == NULL);
1507 1514 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1508 1515 dcpu->dtdsc_rinsing = NULL;
1509 1516 }
1510 1517
1511 1518 /*
1512 1519 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1513 1520 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1514 1521 * This prevents a race whereby a CPU incorrectly decides that
1515 1522 * the state should be something other than DTRACE_DSTATE_CLEAN
1516 1523 * after dtrace_dynvar_clean() has completed.
1517 1524 */
1518 1525 dtrace_sync();
1519 1526
1520 1527 dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1521 1528 }
1522 1529
1523 1530 /*
1524 1531 * Depending on the value of the op parameter, this function looks-up,
1525 1532 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1526 1533 * allocation is requested, this function will return a pointer to a
1527 1534 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1528 1535 * variable can be allocated. If NULL is returned, the appropriate counter
1529 1536 * will be incremented.
1530 1537 */
1531 1538 dtrace_dynvar_t *
1532 1539 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
1533 1540 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1534 1541 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1535 1542 {
1536 1543 uint64_t hashval = DTRACE_DYNHASH_VALID;
1537 1544 dtrace_dynhash_t *hash = dstate->dtds_hash;
1538 1545 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1539 1546 processorid_t me = CPU->cpu_id, cpu = me;
1540 1547 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1541 1548 size_t bucket, ksize;
1542 1549 size_t chunksize = dstate->dtds_chunksize;
1543 1550 uintptr_t kdata, lock, nstate;
1544 1551 uint_t i;
1545 1552
1546 1553 ASSERT(nkeys != 0);
1547 1554
1548 1555 /*
1549 1556 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1550 1557 * algorithm. For the by-value portions, we perform the algorithm in
1551 1558 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1552 1559 * bit, and seems to have only a minute effect on distribution. For
1553 1560 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1554 1561 * over each referenced byte. It's painful to do this, but it's much
1555 1562 * better than pathological hash distribution. The efficacy of the
1556 1563 * hashing algorithm (and a comparison with other algorithms) may be
1557 1564 * found by running the ::dtrace_dynstat MDB dcmd.
1558 1565 */
1559 1566 for (i = 0; i < nkeys; i++) {
1560 1567 if (key[i].dttk_size == 0) {
1561 1568 uint64_t val = key[i].dttk_value;
1562 1569
1563 1570 hashval += (val >> 48) & 0xffff;
1564 1571 hashval += (hashval << 10);
1565 1572 hashval ^= (hashval >> 6);
1566 1573
1567 1574 hashval += (val >> 32) & 0xffff;
1568 1575 hashval += (hashval << 10);
1569 1576 hashval ^= (hashval >> 6);
1570 1577
1571 1578 hashval += (val >> 16) & 0xffff;
1572 1579 hashval += (hashval << 10);
1573 1580 hashval ^= (hashval >> 6);
1574 1581
1575 1582 hashval += val & 0xffff;
1576 1583 hashval += (hashval << 10);
1577 1584 hashval ^= (hashval >> 6);
1578 1585 } else {
1579 1586 /*
1580 1587 * This is incredibly painful, but it beats the hell
1581 1588 * out of the alternative.
1582 1589 */
1583 1590 uint64_t j, size = key[i].dttk_size;
1584 1591 uintptr_t base = (uintptr_t)key[i].dttk_value;
1585 1592
1586 1593 if (!dtrace_canload(base, size, mstate, vstate))
1587 1594 break;
1588 1595
1589 1596 for (j = 0; j < size; j++) {
1590 1597 hashval += dtrace_load8(base + j);
1591 1598 hashval += (hashval << 10);
1592 1599 hashval ^= (hashval >> 6);
1593 1600 }
1594 1601 }
1595 1602 }
1596 1603
1597 1604 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1598 1605 return (NULL);
1599 1606
1600 1607 hashval += (hashval << 3);
1601 1608 hashval ^= (hashval >> 11);
1602 1609 hashval += (hashval << 15);
1603 1610
1604 1611 /*
1605 1612 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1606 1613 * comes out to be one of our two sentinel hash values. If this
1607 1614 * actually happens, we set the hashval to be a value known to be a
1608 1615 * non-sentinel value.
1609 1616 */
1610 1617 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1611 1618 hashval = DTRACE_DYNHASH_VALID;
1612 1619
1613 1620 /*
1614 1621 * Yes, it's painful to do a divide here. If the cycle count becomes
1615 1622 * important here, tricks can be pulled to reduce it. (However, it's
1616 1623 * critical that hash collisions be kept to an absolute minimum;
1617 1624 * they're much more painful than a divide.) It's better to have a
1618 1625 * solution that generates few collisions and still keeps things
1619 1626 * relatively simple.
1620 1627 */
1621 1628 bucket = hashval % dstate->dtds_hashsize;
1622 1629
1623 1630 if (op == DTRACE_DYNVAR_DEALLOC) {
1624 1631 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
1625 1632
1626 1633 for (;;) {
1627 1634 while ((lock = *lockp) & 1)
1628 1635 continue;
1629 1636
1630 1637 if (dtrace_casptr((void *)lockp,
1631 1638 (void *)lock, (void *)(lock + 1)) == (void *)lock)
1632 1639 break;
1633 1640 }
1634 1641
1635 1642 dtrace_membar_producer();
1636 1643 }
1637 1644
1638 1645 top:
1639 1646 prev = NULL;
1640 1647 lock = hash[bucket].dtdh_lock;
1641 1648
1642 1649 dtrace_membar_consumer();
1643 1650
1644 1651 start = hash[bucket].dtdh_chain;
1645 1652 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
1646 1653 start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
1647 1654 op != DTRACE_DYNVAR_DEALLOC));
1648 1655
1649 1656 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
1650 1657 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
1651 1658 dtrace_key_t *dkey = &dtuple->dtt_key[0];
1652 1659
1653 1660 if (dvar->dtdv_hashval != hashval) {
1654 1661 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
1655 1662 /*
1656 1663 * We've reached the sink, and therefore the
1657 1664 * end of the hash chain; we can kick out of
1658 1665 * the loop knowing that we have seen a valid
1659 1666 * snapshot of state.
1660 1667 */
1661 1668 ASSERT(dvar->dtdv_next == NULL);
1662 1669 ASSERT(dvar == &dtrace_dynhash_sink);
1663 1670 break;
1664 1671 }
1665 1672
1666 1673 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
1667 1674 /*
1668 1675 * We've gone off the rails: somewhere along
1669 1676 * the line, one of the members of this hash
1670 1677 * chain was deleted. Note that we could also
1671 1678 * detect this by simply letting this loop run
1672 1679 * to completion, as we would eventually hit
1673 1680 * the end of the dirty list. However, we
1674 1681 * want to avoid running the length of the
1675 1682 * dirty list unnecessarily (it might be quite
1676 1683 * long), so we catch this as early as
1677 1684 * possible by detecting the hash marker. In
1678 1685 * this case, we simply set dvar to NULL and
1679 1686 * break; the conditional after the loop will
1680 1687 * send us back to top.
1681 1688 */
1682 1689 dvar = NULL;
1683 1690 break;
1684 1691 }
1685 1692
1686 1693 goto next;
1687 1694 }
1688 1695
1689 1696 if (dtuple->dtt_nkeys != nkeys)
1690 1697 goto next;
1691 1698
1692 1699 for (i = 0; i < nkeys; i++, dkey++) {
1693 1700 if (dkey->dttk_size != key[i].dttk_size)
1694 1701 goto next; /* size or type mismatch */
1695 1702
1696 1703 if (dkey->dttk_size != 0) {
1697 1704 if (dtrace_bcmp(
1698 1705 (void *)(uintptr_t)key[i].dttk_value,
1699 1706 (void *)(uintptr_t)dkey->dttk_value,
1700 1707 dkey->dttk_size))
1701 1708 goto next;
1702 1709 } else {
1703 1710 if (dkey->dttk_value != key[i].dttk_value)
1704 1711 goto next;
1705 1712 }
1706 1713 }
1707 1714
1708 1715 if (op != DTRACE_DYNVAR_DEALLOC)
1709 1716 return (dvar);
1710 1717
1711 1718 ASSERT(dvar->dtdv_next == NULL ||
1712 1719 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
1713 1720
1714 1721 if (prev != NULL) {
1715 1722 ASSERT(hash[bucket].dtdh_chain != dvar);
1716 1723 ASSERT(start != dvar);
1717 1724 ASSERT(prev->dtdv_next == dvar);
1718 1725 prev->dtdv_next = dvar->dtdv_next;
1719 1726 } else {
1720 1727 if (dtrace_casptr(&hash[bucket].dtdh_chain,
1721 1728 start, dvar->dtdv_next) != start) {
1722 1729 /*
1723 1730 * We have failed to atomically swing the
1724 1731 * hash table head pointer, presumably because
1725 1732 * of a conflicting allocation on another CPU.
1726 1733 * We need to reread the hash chain and try
1727 1734 * again.
1728 1735 */
1729 1736 goto top;
1730 1737 }
1731 1738 }
1732 1739
1733 1740 dtrace_membar_producer();
1734 1741
1735 1742 /*
1736 1743 * Now set the hash value to indicate that it's free.
1737 1744 */
1738 1745 ASSERT(hash[bucket].dtdh_chain != dvar);
1739 1746 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1740 1747
1741 1748 dtrace_membar_producer();
1742 1749
1743 1750 /*
1744 1751 * Set the next pointer to point at the dirty list, and
1745 1752 * atomically swing the dirty pointer to the newly freed dvar.
1746 1753 */
1747 1754 do {
1748 1755 next = dcpu->dtdsc_dirty;
1749 1756 dvar->dtdv_next = next;
1750 1757 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
1751 1758
1752 1759 /*
1753 1760 * Finally, unlock this hash bucket.
1754 1761 */
1755 1762 ASSERT(hash[bucket].dtdh_lock == lock);
1756 1763 ASSERT(lock & 1);
1757 1764 hash[bucket].dtdh_lock++;
1758 1765
1759 1766 return (NULL);
1760 1767 next:
1761 1768 prev = dvar;
1762 1769 continue;
1763 1770 }
1764 1771
1765 1772 if (dvar == NULL) {
1766 1773 /*
1767 1774 * If dvar is NULL, it is because we went off the rails:
1768 1775 * one of the elements that we traversed in the hash chain
1769 1776 * was deleted while we were traversing it. In this case,
1770 1777 * we assert that we aren't doing a dealloc (deallocs lock
1771 1778 * the hash bucket to prevent themselves from racing with
1772 1779 * one another), and retry the hash chain traversal.
1773 1780 */
1774 1781 ASSERT(op != DTRACE_DYNVAR_DEALLOC);
1775 1782 goto top;
1776 1783 }
1777 1784
1778 1785 if (op != DTRACE_DYNVAR_ALLOC) {
1779 1786 /*
1780 1787 * If we are not to allocate a new variable, we want to
1781 1788 * return NULL now. Before we return, check that the value
1782 1789 * of the lock word hasn't changed. If it has, we may have
1783 1790 * seen an inconsistent snapshot.
1784 1791 */
1785 1792 if (op == DTRACE_DYNVAR_NOALLOC) {
1786 1793 if (hash[bucket].dtdh_lock != lock)
1787 1794 goto top;
1788 1795 } else {
1789 1796 ASSERT(op == DTRACE_DYNVAR_DEALLOC);
1790 1797 ASSERT(hash[bucket].dtdh_lock == lock);
1791 1798 ASSERT(lock & 1);
1792 1799 hash[bucket].dtdh_lock++;
1793 1800 }
1794 1801
1795 1802 return (NULL);
1796 1803 }
1797 1804
1798 1805 /*
1799 1806 * We need to allocate a new dynamic variable. The size we need is the
1800 1807 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1801 1808 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1802 1809 * the size of any referred-to data (dsize). We then round the final
1803 1810 * size up to the chunksize for allocation.
1804 1811 */
1805 1812 for (ksize = 0, i = 0; i < nkeys; i++)
1806 1813 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
1807 1814
1808 1815 /*
1809 1816 * This should be pretty much impossible, but could happen if, say,
1810 1817 * strange DIF specified the tuple. Ideally, this should be an
1811 1818 * assertion and not an error condition -- but that requires that the
1812 1819 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1813 1820 * bullet-proof. (That is, it must not be able to be fooled by
1814 1821 * malicious DIF.) Given the lack of backwards branches in DIF,
1815 1822 * solving this would presumably not amount to solving the Halting
1816 1823 * Problem -- but it still seems awfully hard.
1817 1824 */
1818 1825 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
1819 1826 ksize + dsize > chunksize) {
1820 1827 dcpu->dtdsc_drops++;
1821 1828 return (NULL);
1822 1829 }
1823 1830
1824 1831 nstate = DTRACE_DSTATE_EMPTY;
1825 1832
1826 1833 do {
1827 1834 retry:
1828 1835 free = dcpu->dtdsc_free;
1829 1836
1830 1837 if (free == NULL) {
1831 1838 dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
1832 1839 void *rval;
1833 1840
1834 1841 if (clean == NULL) {
1835 1842 /*
1836 1843 * We're out of dynamic variable space on
1837 1844 * this CPU. Unless we have tried all CPUs,
1838 1845 * we'll try to allocate from a different
1839 1846 * CPU.
1840 1847 */
1841 1848 switch (dstate->dtds_state) {
1842 1849 case DTRACE_DSTATE_CLEAN: {
1843 1850 void *sp = &dstate->dtds_state;
1844 1851
1845 1852 if (++cpu >= NCPU)
1846 1853 cpu = 0;
1847 1854
1848 1855 if (dcpu->dtdsc_dirty != NULL &&
1849 1856 nstate == DTRACE_DSTATE_EMPTY)
1850 1857 nstate = DTRACE_DSTATE_DIRTY;
1851 1858
1852 1859 if (dcpu->dtdsc_rinsing != NULL)
1853 1860 nstate = DTRACE_DSTATE_RINSING;
1854 1861
1855 1862 dcpu = &dstate->dtds_percpu[cpu];
1856 1863
1857 1864 if (cpu != me)
1858 1865 goto retry;
1859 1866
1860 1867 (void) dtrace_cas32(sp,
1861 1868 DTRACE_DSTATE_CLEAN, nstate);
1862 1869
1863 1870 /*
1864 1871 * To increment the correct bean
1865 1872 * counter, take another lap.
1866 1873 */
1867 1874 goto retry;
1868 1875 }
1869 1876
1870 1877 case DTRACE_DSTATE_DIRTY:
1871 1878 dcpu->dtdsc_dirty_drops++;
1872 1879 break;
1873 1880
1874 1881 case DTRACE_DSTATE_RINSING:
1875 1882 dcpu->dtdsc_rinsing_drops++;
1876 1883 break;
1877 1884
1878 1885 case DTRACE_DSTATE_EMPTY:
1879 1886 dcpu->dtdsc_drops++;
1880 1887 break;
1881 1888 }
1882 1889
1883 1890 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
1884 1891 return (NULL);
1885 1892 }
1886 1893
1887 1894 /*
1888 1895 * The clean list appears to be non-empty. We want to
1889 1896 * move the clean list to the free list; we start by
1890 1897 * moving the clean pointer aside.
1891 1898 */
1892 1899 if (dtrace_casptr(&dcpu->dtdsc_clean,
1893 1900 clean, NULL) != clean) {
1894 1901 /*
1895 1902 * We are in one of two situations:
1896 1903 *
1897 1904 * (a) The clean list was switched to the
1898 1905 * free list by another CPU.
1899 1906 *
1900 1907 * (b) The clean list was added to by the
1901 1908 * cleansing cyclic.
1902 1909 *
1903 1910 * In either of these situations, we can
1904 1911 * just reattempt the free list allocation.
1905 1912 */
1906 1913 goto retry;
1907 1914 }
1908 1915
1909 1916 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
1910 1917
1911 1918 /*
1912 1919 * Now we'll move the clean list to our free list.
1913 1920 * It's impossible for this to fail: the only way
1914 1921 * the free list can be updated is through this
1915 1922 * code path, and only one CPU can own the clean list.
1916 1923 * Thus, it would only be possible for this to fail if
1917 1924 * this code were racing with dtrace_dynvar_clean().
1918 1925 * (That is, if dtrace_dynvar_clean() updated the clean
1919 1926 * list, and we ended up racing to update the free
1920 1927 * list.) This race is prevented by the dtrace_sync()
1921 1928 * in dtrace_dynvar_clean() -- which flushes the
1922 1929 * owners of the clean lists out before resetting
1923 1930 * the clean lists.
1924 1931 */
1925 1932 dcpu = &dstate->dtds_percpu[me];
1926 1933 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
1927 1934 ASSERT(rval == NULL);
1928 1935 goto retry;
1929 1936 }
1930 1937
1931 1938 dvar = free;
1932 1939 new_free = dvar->dtdv_next;
1933 1940 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
1934 1941
1935 1942 /*
1936 1943 * We have now allocated a new chunk. We copy the tuple keys into the
1937 1944 * tuple array and copy any referenced key data into the data space
1938 1945 * following the tuple array. As we do this, we relocate dttk_value
1939 1946 * in the final tuple to point to the key data address in the chunk.
1940 1947 */
1941 1948 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
1942 1949 dvar->dtdv_data = (void *)(kdata + ksize);
1943 1950 dvar->dtdv_tuple.dtt_nkeys = nkeys;
1944 1951
1945 1952 for (i = 0; i < nkeys; i++) {
1946 1953 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
1947 1954 size_t kesize = key[i].dttk_size;
1948 1955
1949 1956 if (kesize != 0) {
1950 1957 dtrace_bcopy(
1951 1958 (const void *)(uintptr_t)key[i].dttk_value,
1952 1959 (void *)kdata, kesize);
1953 1960 dkey->dttk_value = kdata;
1954 1961 kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
1955 1962 } else {
1956 1963 dkey->dttk_value = key[i].dttk_value;
1957 1964 }
1958 1965
1959 1966 dkey->dttk_size = kesize;
1960 1967 }
1961 1968
1962 1969 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
1963 1970 dvar->dtdv_hashval = hashval;
1964 1971 dvar->dtdv_next = start;
1965 1972
1966 1973 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
1967 1974 return (dvar);
1968 1975
1969 1976 /*
1970 1977 * The cas has failed. Either another CPU is adding an element to
1971 1978 * this hash chain, or another CPU is deleting an element from this
1972 1979 * hash chain. The simplest way to deal with both of these cases
1973 1980 * (though not necessarily the most efficient) is to free our
1974 1981 * allocated block and tail-call ourselves. Note that the free is
1975 1982 * to the dirty list and _not_ to the free list. This is to prevent
1976 1983 * races with allocators, above.
1977 1984 */
1978 1985 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1979 1986
1980 1987 dtrace_membar_producer();
1981 1988
1982 1989 do {
1983 1990 free = dcpu->dtdsc_dirty;
1984 1991 dvar->dtdv_next = free;
1985 1992 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
1986 1993
1987 1994 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
1988 1995 }
1989 1996
1990 1997 /*ARGSUSED*/
1991 1998 static void
1992 1999 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
1993 2000 {
1994 2001 if ((int64_t)nval < (int64_t)*oval)
1995 2002 *oval = nval;
1996 2003 }
1997 2004
1998 2005 /*ARGSUSED*/
1999 2006 static void
2000 2007 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
2001 2008 {
2002 2009 if ((int64_t)nval > (int64_t)*oval)
2003 2010 *oval = nval;
2004 2011 }
2005 2012
2006 2013 static void
2007 2014 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
2008 2015 {
2009 2016 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
2010 2017 int64_t val = (int64_t)nval;
2011 2018
2012 2019 if (val < 0) {
2013 2020 for (i = 0; i < zero; i++) {
2014 2021 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
2015 2022 quanta[i] += incr;
2016 2023 return;
2017 2024 }
2018 2025 }
2019 2026 } else {
2020 2027 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
2021 2028 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
2022 2029 quanta[i - 1] += incr;
2023 2030 return;
2024 2031 }
2025 2032 }
2026 2033
2027 2034 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
2028 2035 return;
2029 2036 }
2030 2037
2031 2038 ASSERT(0);
2032 2039 }
2033 2040
2034 2041 static void
2035 2042 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
2036 2043 {
2037 2044 uint64_t arg = *lquanta++;
2038 2045 int32_t base = DTRACE_LQUANTIZE_BASE(arg);
2039 2046 uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
2040 2047 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
2041 2048 int32_t val = (int32_t)nval, level;
2042 2049
2043 2050 ASSERT(step != 0);
2044 2051 ASSERT(levels != 0);
2045 2052
2046 2053 if (val < base) {
2047 2054 /*
2048 2055 * This is an underflow.
2049 2056 */
2050 2057 lquanta[0] += incr;
2051 2058 return;
2052 2059 }
2053 2060
2054 2061 level = (val - base) / step;
2055 2062
2056 2063 if (level < levels) {
2057 2064 lquanta[level + 1] += incr;
2058 2065 return;
2059 2066 }
2060 2067
2061 2068 /*
2062 2069 * This is an overflow.
2063 2070 */
2064 2071 lquanta[levels + 1] += incr;
2065 2072 }
2066 2073
2067 2074 static int
2068 2075 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
2069 2076 uint16_t high, uint16_t nsteps, int64_t value)
2070 2077 {
2071 2078 int64_t this = 1, last, next;
2072 2079 int base = 1, order;
2073 2080
2074 2081 ASSERT(factor <= nsteps);
2075 2082 ASSERT(nsteps % factor == 0);
2076 2083
2077 2084 for (order = 0; order < low; order++)
2078 2085 this *= factor;
2079 2086
2080 2087 /*
2081 2088 * If our value is less than our factor taken to the power of the
2082 2089 * low order of magnitude, it goes into the zeroth bucket.
2083 2090 */
2084 2091 if (value < (last = this))
2085 2092 return (0);
2086 2093
2087 2094 for (this *= factor; order <= high; order++) {
2088 2095 int nbuckets = this > nsteps ? nsteps : this;
2089 2096
2090 2097 if ((next = this * factor) < this) {
2091 2098 /*
2092 2099 * We should not generally get log/linear quantizations
2093 2100 * with a high magnitude that allows 64-bits to
2094 2101 * overflow, but we nonetheless protect against this
2095 2102 * by explicitly checking for overflow, and clamping
2096 2103 * our value accordingly.
2097 2104 */
2098 2105 value = this - 1;
2099 2106 }
2100 2107
2101 2108 if (value < this) {
2102 2109 /*
2103 2110 * If our value lies within this order of magnitude,
2104 2111 * determine its position by taking the offset within
2105 2112 * the order of magnitude, dividing by the bucket
2106 2113 * width, and adding to our (accumulated) base.
2107 2114 */
2108 2115 return (base + (value - last) / (this / nbuckets));
2109 2116 }
2110 2117
2111 2118 base += nbuckets - (nbuckets / factor);
2112 2119 last = this;
2113 2120 this = next;
2114 2121 }
2115 2122
2116 2123 /*
2117 2124 * Our value is greater than or equal to our factor taken to the
2118 2125 * power of one plus the high magnitude -- return the top bucket.
2119 2126 */
2120 2127 return (base);
2121 2128 }
2122 2129
2123 2130 static void
2124 2131 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2125 2132 {
2126 2133 uint64_t arg = *llquanta++;
2127 2134 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2128 2135 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2129 2136 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
2130 2137 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
2131 2138
2132 2139 llquanta[dtrace_aggregate_llquantize_bucket(factor,
2133 2140 low, high, nsteps, nval)] += incr;
2134 2141 }
2135 2142
2136 2143 /*ARGSUSED*/
2137 2144 static void
2138 2145 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2139 2146 {
2140 2147 data[0]++;
2141 2148 data[1] += nval;
2142 2149 }
2143 2150
2144 2151 /*ARGSUSED*/
2145 2152 static void
2146 2153 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2147 2154 {
2148 2155 int64_t snval = (int64_t)nval;
2149 2156 uint64_t tmp[2];
2150 2157
2151 2158 data[0]++;
2152 2159 data[1] += nval;
2153 2160
2154 2161 /*
2155 2162 * What we want to say here is:
2156 2163 *
2157 2164 * data[2] += nval * nval;
2158 2165 *
2159 2166 * But given that nval is 64-bit, we could easily overflow, so
2160 2167 * we do this as 128-bit arithmetic.
2161 2168 */
2162 2169 if (snval < 0)
2163 2170 snval = -snval;
2164 2171
2165 2172 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2166 2173 dtrace_add_128(data + 2, tmp, data + 2);
2167 2174 }
2168 2175
2169 2176 /*ARGSUSED*/
2170 2177 static void
2171 2178 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2172 2179 {
2173 2180 *oval = *oval + 1;
2174 2181 }
2175 2182
2176 2183 /*ARGSUSED*/
2177 2184 static void
2178 2185 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2179 2186 {
2180 2187 *oval += nval;
2181 2188 }
2182 2189
2183 2190 /*
2184 2191 * Aggregate given the tuple in the principal data buffer, and the aggregating
2185 2192 * action denoted by the specified dtrace_aggregation_t. The aggregation
2186 2193 * buffer is specified as the buf parameter. This routine does not return
2187 2194 * failure; if there is no space in the aggregation buffer, the data will be
2188 2195 * dropped, and a corresponding counter incremented.
2189 2196 */
2190 2197 static void
2191 2198 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2192 2199 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2193 2200 {
2194 2201 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2195 2202 uint32_t i, ndx, size, fsize;
2196 2203 uint32_t align = sizeof (uint64_t) - 1;
2197 2204 dtrace_aggbuffer_t *agb;
2198 2205 dtrace_aggkey_t *key;
2199 2206 uint32_t hashval = 0, limit, isstr;
2200 2207 caddr_t tomax, data, kdata;
2201 2208 dtrace_actkind_t action;
2202 2209 dtrace_action_t *act;
2203 2210 uintptr_t offs;
2204 2211
2205 2212 if (buf == NULL)
2206 2213 return;
2207 2214
2208 2215 if (!agg->dtag_hasarg) {
2209 2216 /*
2210 2217 * Currently, only quantize() and lquantize() take additional
2211 2218 * arguments, and they have the same semantics: an increment
2212 2219 * value that defaults to 1 when not present. If additional
2213 2220 * aggregating actions take arguments, the setting of the
2214 2221 * default argument value will presumably have to become more
2215 2222 * sophisticated...
2216 2223 */
2217 2224 arg = 1;
2218 2225 }
2219 2226
2220 2227 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2221 2228 size = rec->dtrd_offset - agg->dtag_base;
2222 2229 fsize = size + rec->dtrd_size;
2223 2230
2224 2231 ASSERT(dbuf->dtb_tomax != NULL);
2225 2232 data = dbuf->dtb_tomax + offset + agg->dtag_base;
2226 2233
2227 2234 if ((tomax = buf->dtb_tomax) == NULL) {
2228 2235 dtrace_buffer_drop(buf);
2229 2236 return;
2230 2237 }
2231 2238
2232 2239 /*
2233 2240 * The metastructure is always at the bottom of the buffer.
2234 2241 */
2235 2242 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2236 2243 sizeof (dtrace_aggbuffer_t));
2237 2244
2238 2245 if (buf->dtb_offset == 0) {
2239 2246 /*
2240 2247 * We just kludge up approximately 1/8th of the size to be
2241 2248 * buckets. If this guess ends up being routinely
2242 2249 * off-the-mark, we may need to dynamically readjust this
2243 2250 * based on past performance.
2244 2251 */
2245 2252 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2246 2253
2247 2254 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2248 2255 (uintptr_t)tomax || hashsize == 0) {
2249 2256 /*
2250 2257 * We've been given a ludicrously small buffer;
2251 2258 * increment our drop count and leave.
2252 2259 */
2253 2260 dtrace_buffer_drop(buf);
2254 2261 return;
2255 2262 }
2256 2263
2257 2264 /*
2258 2265 * And now, a pathetic attempt to try to get a an odd (or
2259 2266 * perchance, a prime) hash size for better hash distribution.
2260 2267 */
2261 2268 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2262 2269 hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2263 2270
2264 2271 agb->dtagb_hashsize = hashsize;
2265 2272 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2266 2273 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2267 2274 agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2268 2275
2269 2276 for (i = 0; i < agb->dtagb_hashsize; i++)
2270 2277 agb->dtagb_hash[i] = NULL;
2271 2278 }
2272 2279
2273 2280 ASSERT(agg->dtag_first != NULL);
2274 2281 ASSERT(agg->dtag_first->dta_intuple);
2275 2282
2276 2283 /*
2277 2284 * Calculate the hash value based on the key. Note that we _don't_
2278 2285 * include the aggid in the hashing (but we will store it as part of
2279 2286 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2280 2287 * algorithm: a simple, quick algorithm that has no known funnels, and
2281 2288 * gets good distribution in practice. The efficacy of the hashing
2282 2289 * algorithm (and a comparison with other algorithms) may be found by
2283 2290 * running the ::dtrace_aggstat MDB dcmd.
2284 2291 */
2285 2292 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2286 2293 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2287 2294 limit = i + act->dta_rec.dtrd_size;
2288 2295 ASSERT(limit <= size);
2289 2296 isstr = DTRACEACT_ISSTRING(act);
2290 2297
2291 2298 for (; i < limit; i++) {
2292 2299 hashval += data[i];
2293 2300 hashval += (hashval << 10);
2294 2301 hashval ^= (hashval >> 6);
2295 2302
2296 2303 if (isstr && data[i] == '\0')
2297 2304 break;
2298 2305 }
2299 2306 }
2300 2307
2301 2308 hashval += (hashval << 3);
2302 2309 hashval ^= (hashval >> 11);
2303 2310 hashval += (hashval << 15);
2304 2311
2305 2312 /*
2306 2313 * Yes, the divide here is expensive -- but it's generally the least
2307 2314 * of the performance issues given the amount of data that we iterate
2308 2315 * over to compute hash values, compare data, etc.
2309 2316 */
2310 2317 ndx = hashval % agb->dtagb_hashsize;
2311 2318
2312 2319 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2313 2320 ASSERT((caddr_t)key >= tomax);
2314 2321 ASSERT((caddr_t)key < tomax + buf->dtb_size);
2315 2322
2316 2323 if (hashval != key->dtak_hashval || key->dtak_size != size)
2317 2324 continue;
2318 2325
2319 2326 kdata = key->dtak_data;
2320 2327 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2321 2328
2322 2329 for (act = agg->dtag_first; act->dta_intuple;
2323 2330 act = act->dta_next) {
2324 2331 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2325 2332 limit = i + act->dta_rec.dtrd_size;
2326 2333 ASSERT(limit <= size);
2327 2334 isstr = DTRACEACT_ISSTRING(act);
2328 2335
2329 2336 for (; i < limit; i++) {
2330 2337 if (kdata[i] != data[i])
2331 2338 goto next;
2332 2339
2333 2340 if (isstr && data[i] == '\0')
2334 2341 break;
2335 2342 }
2336 2343 }
2337 2344
2338 2345 if (action != key->dtak_action) {
2339 2346 /*
2340 2347 * We are aggregating on the same value in the same
2341 2348 * aggregation with two different aggregating actions.
2342 2349 * (This should have been picked up in the compiler,
2343 2350 * so we may be dealing with errant or devious DIF.)
2344 2351 * This is an error condition; we indicate as much,
2345 2352 * and return.
2346 2353 */
2347 2354 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2348 2355 return;
2349 2356 }
2350 2357
2351 2358 /*
2352 2359 * This is a hit: we need to apply the aggregator to
2353 2360 * the value at this key.
2354 2361 */
2355 2362 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2356 2363 return;
2357 2364 next:
2358 2365 continue;
2359 2366 }
2360 2367
2361 2368 /*
2362 2369 * We didn't find it. We need to allocate some zero-filled space,
2363 2370 * link it into the hash table appropriately, and apply the aggregator
2364 2371 * to the (zero-filled) value.
2365 2372 */
2366 2373 offs = buf->dtb_offset;
2367 2374 while (offs & (align - 1))
2368 2375 offs += sizeof (uint32_t);
2369 2376
2370 2377 /*
2371 2378 * If we don't have enough room to both allocate a new key _and_
2372 2379 * its associated data, increment the drop count and return.
2373 2380 */
2374 2381 if ((uintptr_t)tomax + offs + fsize >
2375 2382 agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2376 2383 dtrace_buffer_drop(buf);
2377 2384 return;
2378 2385 }
2379 2386
2380 2387 /*CONSTCOND*/
2381 2388 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2382 2389 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2383 2390 agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2384 2391
2385 2392 key->dtak_data = kdata = tomax + offs;
2386 2393 buf->dtb_offset = offs + fsize;
2387 2394
2388 2395 /*
2389 2396 * Now copy the data across.
2390 2397 */
2391 2398 *((dtrace_aggid_t *)kdata) = agg->dtag_id;
2392 2399
2393 2400 for (i = sizeof (dtrace_aggid_t); i < size; i++)
2394 2401 kdata[i] = data[i];
2395 2402
2396 2403 /*
2397 2404 * Because strings are not zeroed out by default, we need to iterate
2398 2405 * looking for actions that store strings, and we need to explicitly
2399 2406 * pad these strings out with zeroes.
2400 2407 */
2401 2408 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2402 2409 int nul;
2403 2410
2404 2411 if (!DTRACEACT_ISSTRING(act))
2405 2412 continue;
2406 2413
2407 2414 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2408 2415 limit = i + act->dta_rec.dtrd_size;
2409 2416 ASSERT(limit <= size);
2410 2417
2411 2418 for (nul = 0; i < limit; i++) {
2412 2419 if (nul) {
2413 2420 kdata[i] = '\0';
2414 2421 continue;
2415 2422 }
2416 2423
2417 2424 if (data[i] != '\0')
2418 2425 continue;
2419 2426
2420 2427 nul = 1;
2421 2428 }
2422 2429 }
2423 2430
2424 2431 for (i = size; i < fsize; i++)
2425 2432 kdata[i] = 0;
2426 2433
2427 2434 key->dtak_hashval = hashval;
2428 2435 key->dtak_size = size;
2429 2436 key->dtak_action = action;
2430 2437 key->dtak_next = agb->dtagb_hash[ndx];
2431 2438 agb->dtagb_hash[ndx] = key;
2432 2439
2433 2440 /*
2434 2441 * Finally, apply the aggregator.
2435 2442 */
2436 2443 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2437 2444 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2438 2445 }
2439 2446
2440 2447 /*
2441 2448 * Given consumer state, this routine finds a speculation in the INACTIVE
2442 2449 * state and transitions it into the ACTIVE state. If there is no speculation
2443 2450 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2444 2451 * incremented -- it is up to the caller to take appropriate action.
2445 2452 */
2446 2453 static int
2447 2454 dtrace_speculation(dtrace_state_t *state)
2448 2455 {
2449 2456 int i = 0;
2450 2457 dtrace_speculation_state_t current;
2451 2458 uint32_t *stat = &state->dts_speculations_unavail, count;
2452 2459
2453 2460 while (i < state->dts_nspeculations) {
2454 2461 dtrace_speculation_t *spec = &state->dts_speculations[i];
2455 2462
2456 2463 current = spec->dtsp_state;
2457 2464
2458 2465 if (current != DTRACESPEC_INACTIVE) {
2459 2466 if (current == DTRACESPEC_COMMITTINGMANY ||
2460 2467 current == DTRACESPEC_COMMITTING ||
2461 2468 current == DTRACESPEC_DISCARDING)
2462 2469 stat = &state->dts_speculations_busy;
2463 2470 i++;
2464 2471 continue;
2465 2472 }
2466 2473
2467 2474 if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2468 2475 current, DTRACESPEC_ACTIVE) == current)
2469 2476 return (i + 1);
2470 2477 }
2471 2478
2472 2479 /*
2473 2480 * We couldn't find a speculation. If we found as much as a single
2474 2481 * busy speculation buffer, we'll attribute this failure as "busy"
2475 2482 * instead of "unavail".
2476 2483 */
2477 2484 do {
2478 2485 count = *stat;
2479 2486 } while (dtrace_cas32(stat, count, count + 1) != count);
2480 2487
2481 2488 return (0);
2482 2489 }
2483 2490
2484 2491 /*
2485 2492 * This routine commits an active speculation. If the specified speculation
2486 2493 * is not in a valid state to perform a commit(), this routine will silently do
2487 2494 * nothing. The state of the specified speculation is transitioned according
2488 2495 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2489 2496 */
2490 2497 static void
2491 2498 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2492 2499 dtrace_specid_t which)
2493 2500 {
2494 2501 dtrace_speculation_t *spec;
2495 2502 dtrace_buffer_t *src, *dest;
2496 2503 uintptr_t daddr, saddr, dlimit;
2497 2504 dtrace_speculation_state_t current, new;
2498 2505 intptr_t offs;
2499 2506
2500 2507 if (which == 0)
2501 2508 return;
2502 2509
2503 2510 if (which > state->dts_nspeculations) {
2504 2511 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2505 2512 return;
2506 2513 }
2507 2514
2508 2515 spec = &state->dts_speculations[which - 1];
2509 2516 src = &spec->dtsp_buffer[cpu];
2510 2517 dest = &state->dts_buffer[cpu];
2511 2518
2512 2519 do {
2513 2520 current = spec->dtsp_state;
2514 2521
2515 2522 if (current == DTRACESPEC_COMMITTINGMANY)
2516 2523 break;
2517 2524
2518 2525 switch (current) {
2519 2526 case DTRACESPEC_INACTIVE:
2520 2527 case DTRACESPEC_DISCARDING:
2521 2528 return;
2522 2529
2523 2530 case DTRACESPEC_COMMITTING:
2524 2531 /*
2525 2532 * This is only possible if we are (a) commit()'ing
2526 2533 * without having done a prior speculate() on this CPU
2527 2534 * and (b) racing with another commit() on a different
2528 2535 * CPU. There's nothing to do -- we just assert that
2529 2536 * our offset is 0.
2530 2537 */
2531 2538 ASSERT(src->dtb_offset == 0);
2532 2539 return;
2533 2540
2534 2541 case DTRACESPEC_ACTIVE:
2535 2542 new = DTRACESPEC_COMMITTING;
2536 2543 break;
2537 2544
2538 2545 case DTRACESPEC_ACTIVEONE:
2539 2546 /*
2540 2547 * This speculation is active on one CPU. If our
2541 2548 * buffer offset is non-zero, we know that the one CPU
2542 2549 * must be us. Otherwise, we are committing on a
2543 2550 * different CPU from the speculate(), and we must
2544 2551 * rely on being asynchronously cleaned.
2545 2552 */
2546 2553 if (src->dtb_offset != 0) {
2547 2554 new = DTRACESPEC_COMMITTING;
2548 2555 break;
2549 2556 }
2550 2557 /*FALLTHROUGH*/
2551 2558
2552 2559 case DTRACESPEC_ACTIVEMANY:
2553 2560 new = DTRACESPEC_COMMITTINGMANY;
2554 2561 break;
2555 2562
2556 2563 default:
2557 2564 ASSERT(0);
2558 2565 }
2559 2566 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2560 2567 current, new) != current);
2561 2568
2562 2569 /*
2563 2570 * We have set the state to indicate that we are committing this
2564 2571 * speculation. Now reserve the necessary space in the destination
2565 2572 * buffer.
2566 2573 */
2567 2574 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2568 2575 sizeof (uint64_t), state, NULL)) < 0) {
2569 2576 dtrace_buffer_drop(dest);
2570 2577 goto out;
2571 2578 }
2572 2579
2573 2580 /*
2574 2581 * We have the space; copy the buffer across. (Note that this is a
2575 2582 * highly subobtimal bcopy(); in the unlikely event that this becomes
2576 2583 * a serious performance issue, a high-performance DTrace-specific
2577 2584 * bcopy() should obviously be invented.)
2578 2585 */
2579 2586 daddr = (uintptr_t)dest->dtb_tomax + offs;
2580 2587 dlimit = daddr + src->dtb_offset;
2581 2588 saddr = (uintptr_t)src->dtb_tomax;
2582 2589
2583 2590 /*
2584 2591 * First, the aligned portion.
2585 2592 */
2586 2593 while (dlimit - daddr >= sizeof (uint64_t)) {
2587 2594 *((uint64_t *)daddr) = *((uint64_t *)saddr);
2588 2595
2589 2596 daddr += sizeof (uint64_t);
2590 2597 saddr += sizeof (uint64_t);
2591 2598 }
2592 2599
2593 2600 /*
2594 2601 * Now any left-over bit...
2595 2602 */
2596 2603 while (dlimit - daddr)
2597 2604 *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
2598 2605
2599 2606 /*
2600 2607 * Finally, commit the reserved space in the destination buffer.
2601 2608 */
2602 2609 dest->dtb_offset = offs + src->dtb_offset;
2603 2610
2604 2611 out:
2605 2612 /*
2606 2613 * If we're lucky enough to be the only active CPU on this speculation
2607 2614 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2608 2615 */
2609 2616 if (current == DTRACESPEC_ACTIVE ||
2610 2617 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
2611 2618 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
2612 2619 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
2613 2620
2614 2621 ASSERT(rval == DTRACESPEC_COMMITTING);
2615 2622 }
2616 2623
2617 2624 src->dtb_offset = 0;
2618 2625 src->dtb_xamot_drops += src->dtb_drops;
2619 2626 src->dtb_drops = 0;
2620 2627 }
2621 2628
2622 2629 /*
2623 2630 * This routine discards an active speculation. If the specified speculation
2624 2631 * is not in a valid state to perform a discard(), this routine will silently
2625 2632 * do nothing. The state of the specified speculation is transitioned
2626 2633 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2627 2634 */
2628 2635 static void
2629 2636 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
2630 2637 dtrace_specid_t which)
2631 2638 {
2632 2639 dtrace_speculation_t *spec;
2633 2640 dtrace_speculation_state_t current, new;
2634 2641 dtrace_buffer_t *buf;
2635 2642
2636 2643 if (which == 0)
2637 2644 return;
2638 2645
2639 2646 if (which > state->dts_nspeculations) {
2640 2647 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2641 2648 return;
2642 2649 }
2643 2650
2644 2651 spec = &state->dts_speculations[which - 1];
2645 2652 buf = &spec->dtsp_buffer[cpu];
2646 2653
2647 2654 do {
2648 2655 current = spec->dtsp_state;
2649 2656
2650 2657 switch (current) {
2651 2658 case DTRACESPEC_INACTIVE:
2652 2659 case DTRACESPEC_COMMITTINGMANY:
2653 2660 case DTRACESPEC_COMMITTING:
2654 2661 case DTRACESPEC_DISCARDING:
2655 2662 return;
2656 2663
2657 2664 case DTRACESPEC_ACTIVE:
2658 2665 case DTRACESPEC_ACTIVEMANY:
2659 2666 new = DTRACESPEC_DISCARDING;
2660 2667 break;
2661 2668
2662 2669 case DTRACESPEC_ACTIVEONE:
2663 2670 if (buf->dtb_offset != 0) {
2664 2671 new = DTRACESPEC_INACTIVE;
2665 2672 } else {
2666 2673 new = DTRACESPEC_DISCARDING;
2667 2674 }
2668 2675 break;
2669 2676
2670 2677 default:
2671 2678 ASSERT(0);
2672 2679 }
2673 2680 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2674 2681 current, new) != current);
2675 2682
2676 2683 buf->dtb_offset = 0;
2677 2684 buf->dtb_drops = 0;
2678 2685 }
2679 2686
2680 2687 /*
2681 2688 * Note: not called from probe context. This function is called
2682 2689 * asynchronously from cross call context to clean any speculations that are
2683 2690 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2684 2691 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2685 2692 * speculation.
2686 2693 */
2687 2694 static void
2688 2695 dtrace_speculation_clean_here(dtrace_state_t *state)
2689 2696 {
2690 2697 dtrace_icookie_t cookie;
2691 2698 processorid_t cpu = CPU->cpu_id;
2692 2699 dtrace_buffer_t *dest = &state->dts_buffer[cpu];
2693 2700 dtrace_specid_t i;
2694 2701
2695 2702 cookie = dtrace_interrupt_disable();
2696 2703
2697 2704 if (dest->dtb_tomax == NULL) {
2698 2705 dtrace_interrupt_enable(cookie);
2699 2706 return;
2700 2707 }
2701 2708
2702 2709 for (i = 0; i < state->dts_nspeculations; i++) {
2703 2710 dtrace_speculation_t *spec = &state->dts_speculations[i];
2704 2711 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
2705 2712
2706 2713 if (src->dtb_tomax == NULL)
2707 2714 continue;
2708 2715
2709 2716 if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
2710 2717 src->dtb_offset = 0;
2711 2718 continue;
2712 2719 }
2713 2720
2714 2721 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2715 2722 continue;
2716 2723
2717 2724 if (src->dtb_offset == 0)
2718 2725 continue;
2719 2726
2720 2727 dtrace_speculation_commit(state, cpu, i + 1);
2721 2728 }
2722 2729
2723 2730 dtrace_interrupt_enable(cookie);
2724 2731 }
2725 2732
2726 2733 /*
2727 2734 * Note: not called from probe context. This function is called
2728 2735 * asynchronously (and at a regular interval) to clean any speculations that
2729 2736 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2730 2737 * is work to be done, it cross calls all CPUs to perform that work;
2731 2738 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2732 2739 * INACTIVE state until they have been cleaned by all CPUs.
2733 2740 */
2734 2741 static void
2735 2742 dtrace_speculation_clean(dtrace_state_t *state)
2736 2743 {
2737 2744 int work = 0, rv;
2738 2745 dtrace_specid_t i;
2739 2746
2740 2747 for (i = 0; i < state->dts_nspeculations; i++) {
2741 2748 dtrace_speculation_t *spec = &state->dts_speculations[i];
2742 2749
2743 2750 ASSERT(!spec->dtsp_cleaning);
2744 2751
2745 2752 if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
2746 2753 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2747 2754 continue;
2748 2755
2749 2756 work++;
2750 2757 spec->dtsp_cleaning = 1;
2751 2758 }
2752 2759
2753 2760 if (!work)
2754 2761 return;
2755 2762
2756 2763 dtrace_xcall(DTRACE_CPUALL,
2757 2764 (dtrace_xcall_t)dtrace_speculation_clean_here, state);
2758 2765
2759 2766 /*
2760 2767 * We now know that all CPUs have committed or discarded their
2761 2768 * speculation buffers, as appropriate. We can now set the state
2762 2769 * to inactive.
2763 2770 */
2764 2771 for (i = 0; i < state->dts_nspeculations; i++) {
2765 2772 dtrace_speculation_t *spec = &state->dts_speculations[i];
2766 2773 dtrace_speculation_state_t current, new;
2767 2774
2768 2775 if (!spec->dtsp_cleaning)
2769 2776 continue;
2770 2777
2771 2778 current = spec->dtsp_state;
2772 2779 ASSERT(current == DTRACESPEC_DISCARDING ||
2773 2780 current == DTRACESPEC_COMMITTINGMANY);
2774 2781
2775 2782 new = DTRACESPEC_INACTIVE;
2776 2783
2777 2784 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
2778 2785 ASSERT(rv == current);
2779 2786 spec->dtsp_cleaning = 0;
2780 2787 }
2781 2788 }
2782 2789
2783 2790 /*
2784 2791 * Called as part of a speculate() to get the speculative buffer associated
2785 2792 * with a given speculation. Returns NULL if the specified speculation is not
2786 2793 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2787 2794 * the active CPU is not the specified CPU -- the speculation will be
2788 2795 * atomically transitioned into the ACTIVEMANY state.
2789 2796 */
2790 2797 static dtrace_buffer_t *
2791 2798 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
2792 2799 dtrace_specid_t which)
2793 2800 {
2794 2801 dtrace_speculation_t *spec;
2795 2802 dtrace_speculation_state_t current, new;
2796 2803 dtrace_buffer_t *buf;
2797 2804
2798 2805 if (which == 0)
2799 2806 return (NULL);
2800 2807
2801 2808 if (which > state->dts_nspeculations) {
2802 2809 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2803 2810 return (NULL);
2804 2811 }
2805 2812
2806 2813 spec = &state->dts_speculations[which - 1];
2807 2814 buf = &spec->dtsp_buffer[cpuid];
2808 2815
2809 2816 do {
2810 2817 current = spec->dtsp_state;
2811 2818
2812 2819 switch (current) {
2813 2820 case DTRACESPEC_INACTIVE:
2814 2821 case DTRACESPEC_COMMITTINGMANY:
2815 2822 case DTRACESPEC_DISCARDING:
2816 2823 return (NULL);
2817 2824
2818 2825 case DTRACESPEC_COMMITTING:
2819 2826 ASSERT(buf->dtb_offset == 0);
2820 2827 return (NULL);
2821 2828
2822 2829 case DTRACESPEC_ACTIVEONE:
2823 2830 /*
2824 2831 * This speculation is currently active on one CPU.
2825 2832 * Check the offset in the buffer; if it's non-zero,
2826 2833 * that CPU must be us (and we leave the state alone).
2827 2834 * If it's zero, assume that we're starting on a new
2828 2835 * CPU -- and change the state to indicate that the
2829 2836 * speculation is active on more than one CPU.
2830 2837 */
2831 2838 if (buf->dtb_offset != 0)
2832 2839 return (buf);
2833 2840
2834 2841 new = DTRACESPEC_ACTIVEMANY;
2835 2842 break;
2836 2843
2837 2844 case DTRACESPEC_ACTIVEMANY:
2838 2845 return (buf);
2839 2846
2840 2847 case DTRACESPEC_ACTIVE:
2841 2848 new = DTRACESPEC_ACTIVEONE;
2842 2849 break;
2843 2850
2844 2851 default:
2845 2852 ASSERT(0);
2846 2853 }
2847 2854 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2848 2855 current, new) != current);
2849 2856
2850 2857 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
2851 2858 return (buf);
2852 2859 }
2853 2860
2854 2861 /*
2855 2862 * Return a string. In the event that the user lacks the privilege to access
2856 2863 * arbitrary kernel memory, we copy the string out to scratch memory so that we
2857 2864 * don't fail access checking.
2858 2865 *
2859 2866 * dtrace_dif_variable() uses this routine as a helper for various
2860 2867 * builtin values such as 'execname' and 'probefunc.'
2861 2868 */
2862 2869 uintptr_t
2863 2870 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
2864 2871 dtrace_mstate_t *mstate)
2865 2872 {
2866 2873 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
2867 2874 uintptr_t ret;
2868 2875 size_t strsz;
2869 2876
2870 2877 /*
2871 2878 * The easy case: this probe is allowed to read all of memory, so
2872 2879 * we can just return this as a vanilla pointer.
2873 2880 */
2874 2881 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
2875 2882 return (addr);
2876 2883
2877 2884 /*
2878 2885 * This is the tougher case: we copy the string in question from
2879 2886 * kernel memory into scratch memory and return it that way: this
2880 2887 * ensures that we won't trip up when access checking tests the
2881 2888 * BYREF return value.
2882 2889 */
2883 2890 strsz = dtrace_strlen((char *)addr, size) + 1;
2884 2891
2885 2892 if (mstate->dtms_scratch_ptr + strsz >
2886 2893 mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
2887 2894 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
2888 2895 return (NULL);
2889 2896 }
2890 2897
2891 2898 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
2892 2899 strsz);
2893 2900 ret = mstate->dtms_scratch_ptr;
2894 2901 mstate->dtms_scratch_ptr += strsz;
2895 2902 return (ret);
2896 2903 }
2897 2904
2898 2905 /*
2899 2906 * This function implements the DIF emulator's variable lookups. The emulator
2900 2907 * passes a reserved variable identifier and optional built-in array index.
2901 2908 */
2902 2909 static uint64_t
2903 2910 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
2904 2911 uint64_t ndx)
2905 2912 {
2906 2913 /*
2907 2914 * If we're accessing one of the uncached arguments, we'll turn this
2908 2915 * into a reference in the args array.
2909 2916 */
2910 2917 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
2911 2918 ndx = v - DIF_VAR_ARG0;
2912 2919 v = DIF_VAR_ARGS;
2913 2920 }
2914 2921
2915 2922 switch (v) {
2916 2923 case DIF_VAR_ARGS:
2917 2924 if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
2918 2925 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
2919 2926 CPU_DTRACE_KPRIV;
2920 2927 return (0);
2921 2928 }
2922 2929
2923 2930 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
2924 2931 if (ndx >= sizeof (mstate->dtms_arg) /
2925 2932 sizeof (mstate->dtms_arg[0])) {
2926 2933 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
2927 2934 dtrace_provider_t *pv;
2928 2935 uint64_t val;
2929 2936
2930 2937 pv = mstate->dtms_probe->dtpr_provider;
2931 2938 if (pv->dtpv_pops.dtps_getargval != NULL)
2932 2939 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
2933 2940 mstate->dtms_probe->dtpr_id,
2934 2941 mstate->dtms_probe->dtpr_arg, ndx, aframes);
2935 2942 else
2936 2943 val = dtrace_getarg(ndx, aframes);
2937 2944
2938 2945 /*
2939 2946 * This is regrettably required to keep the compiler
2940 2947 * from tail-optimizing the call to dtrace_getarg().
2941 2948 * The condition always evaluates to true, but the
2942 2949 * compiler has no way of figuring that out a priori.
2943 2950 * (None of this would be necessary if the compiler
2944 2951 * could be relied upon to _always_ tail-optimize
2945 2952 * the call to dtrace_getarg() -- but it can't.)
2946 2953 */
2947 2954 if (mstate->dtms_probe != NULL)
2948 2955 return (val);
2949 2956
2950 2957 ASSERT(0);
2951 2958 }
2952 2959
2953 2960 return (mstate->dtms_arg[ndx]);
2954 2961
2955 2962 case DIF_VAR_UREGS: {
2956 2963 klwp_t *lwp;
2957 2964
2958 2965 if (!dtrace_priv_proc(state, mstate))
2959 2966 return (0);
2960 2967
2961 2968 if ((lwp = curthread->t_lwp) == NULL) {
2962 2969 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
2963 2970 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL;
2964 2971 return (0);
2965 2972 }
2966 2973
2967 2974 return (dtrace_getreg(lwp->lwp_regs, ndx));
2968 2975 }
2969 2976
2970 2977 case DIF_VAR_VMREGS: {
2971 2978 uint64_t rval;
2972 2979
2973 2980 if (!dtrace_priv_kernel(state))
2974 2981 return (0);
2975 2982
2976 2983 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
2977 2984
2978 2985 rval = dtrace_getvmreg(ndx,
2979 2986 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
2980 2987
2981 2988 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2982 2989
2983 2990 return (rval);
2984 2991 }
2985 2992
2986 2993 case DIF_VAR_CURTHREAD:
2987 2994 if (!dtrace_priv_proc(state, mstate))
2988 2995 return (0);
2989 2996 return ((uint64_t)(uintptr_t)curthread);
2990 2997
2991 2998 case DIF_VAR_TIMESTAMP:
2992 2999 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
2993 3000 mstate->dtms_timestamp = dtrace_gethrtime();
2994 3001 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
2995 3002 }
2996 3003 return (mstate->dtms_timestamp);
2997 3004
2998 3005 case DIF_VAR_VTIMESTAMP:
2999 3006 ASSERT(dtrace_vtime_references != 0);
3000 3007 return (curthread->t_dtrace_vtime);
3001 3008
3002 3009 case DIF_VAR_WALLTIMESTAMP:
3003 3010 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
3004 3011 mstate->dtms_walltimestamp = dtrace_gethrestime();
3005 3012 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
3006 3013 }
3007 3014 return (mstate->dtms_walltimestamp);
3008 3015
3009 3016 case DIF_VAR_IPL:
3010 3017 if (!dtrace_priv_kernel(state))
3011 3018 return (0);
3012 3019 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
3013 3020 mstate->dtms_ipl = dtrace_getipl();
3014 3021 mstate->dtms_present |= DTRACE_MSTATE_IPL;
3015 3022 }
3016 3023 return (mstate->dtms_ipl);
3017 3024
3018 3025 case DIF_VAR_EPID:
3019 3026 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
3020 3027 return (mstate->dtms_epid);
3021 3028
3022 3029 case DIF_VAR_ID:
3023 3030 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3024 3031 return (mstate->dtms_probe->dtpr_id);
3025 3032
3026 3033 case DIF_VAR_STACKDEPTH:
3027 3034 if (!dtrace_priv_kernel(state))
3028 3035 return (0);
3029 3036 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
3030 3037 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3031 3038
3032 3039 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
3033 3040 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
3034 3041 }
3035 3042 return (mstate->dtms_stackdepth);
3036 3043
3037 3044 case DIF_VAR_USTACKDEPTH:
3038 3045 if (!dtrace_priv_proc(state, mstate))
3039 3046 return (0);
3040 3047 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
3041 3048 /*
3042 3049 * See comment in DIF_VAR_PID.
3043 3050 */
3044 3051 if (DTRACE_ANCHORED(mstate->dtms_probe) &&
3045 3052 CPU_ON_INTR(CPU)) {
3046 3053 mstate->dtms_ustackdepth = 0;
3047 3054 } else {
3048 3055 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3049 3056 mstate->dtms_ustackdepth =
3050 3057 dtrace_getustackdepth();
3051 3058 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3052 3059 }
3053 3060 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
3054 3061 }
3055 3062 return (mstate->dtms_ustackdepth);
3056 3063
3057 3064 case DIF_VAR_CALLER:
3058 3065 if (!dtrace_priv_kernel(state))
3059 3066 return (0);
3060 3067 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
3061 3068 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3062 3069
3063 3070 if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
3064 3071 /*
3065 3072 * If this is an unanchored probe, we are
3066 3073 * required to go through the slow path:
3067 3074 * dtrace_caller() only guarantees correct
3068 3075 * results for anchored probes.
3069 3076 */
3070 3077 pc_t caller[2];
3071 3078
3072 3079 dtrace_getpcstack(caller, 2, aframes,
3073 3080 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
3074 3081 mstate->dtms_caller = caller[1];
3075 3082 } else if ((mstate->dtms_caller =
3076 3083 dtrace_caller(aframes)) == -1) {
3077 3084 /*
3078 3085 * We have failed to do this the quick way;
3079 3086 * we must resort to the slower approach of
3080 3087 * calling dtrace_getpcstack().
3081 3088 */
3082 3089 pc_t caller;
3083 3090
3084 3091 dtrace_getpcstack(&caller, 1, aframes, NULL);
3085 3092 mstate->dtms_caller = caller;
3086 3093 }
3087 3094
3088 3095 mstate->dtms_present |= DTRACE_MSTATE_CALLER;
3089 3096 }
3090 3097 return (mstate->dtms_caller);
3091 3098
3092 3099 case DIF_VAR_UCALLER:
3093 3100 if (!dtrace_priv_proc(state, mstate))
3094 3101 return (0);
3095 3102
3096 3103 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3097 3104 uint64_t ustack[3];
3098 3105
3099 3106 /*
3100 3107 * dtrace_getupcstack() fills in the first uint64_t
3101 3108 * with the current PID. The second uint64_t will
3102 3109 * be the program counter at user-level. The third
3103 3110 * uint64_t will contain the caller, which is what
3104 3111 * we're after.
3105 3112 */
3106 3113 ustack[2] = NULL;
3107 3114 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3108 3115 dtrace_getupcstack(ustack, 3);
3109 3116 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3110 3117 mstate->dtms_ucaller = ustack[2];
3111 3118 mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3112 3119 }
3113 3120
3114 3121 return (mstate->dtms_ucaller);
3115 3122
3116 3123 case DIF_VAR_PROBEPROV:
3117 3124 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3118 3125 return (dtrace_dif_varstr(
3119 3126 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3120 3127 state, mstate));
3121 3128
3122 3129 case DIF_VAR_PROBEMOD:
3123 3130 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3124 3131 return (dtrace_dif_varstr(
3125 3132 (uintptr_t)mstate->dtms_probe->dtpr_mod,
3126 3133 state, mstate));
3127 3134
3128 3135 case DIF_VAR_PROBEFUNC:
3129 3136 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3130 3137 return (dtrace_dif_varstr(
3131 3138 (uintptr_t)mstate->dtms_probe->dtpr_func,
3132 3139 state, mstate));
3133 3140
3134 3141 case DIF_VAR_PROBENAME:
3135 3142 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3136 3143 return (dtrace_dif_varstr(
3137 3144 (uintptr_t)mstate->dtms_probe->dtpr_name,
3138 3145 state, mstate));
3139 3146
3140 3147 case DIF_VAR_PID:
3141 3148 if (!dtrace_priv_proc(state, mstate))
3142 3149 return (0);
3143 3150
3144 3151 /*
3145 3152 * Note that we are assuming that an unanchored probe is
3146 3153 * always due to a high-level interrupt. (And we're assuming
3147 3154 * that there is only a single high level interrupt.)
3148 3155 */
3149 3156 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3150 3157 return (pid0.pid_id);
3151 3158
3152 3159 /*
3153 3160 * It is always safe to dereference one's own t_procp pointer:
3154 3161 * it always points to a valid, allocated proc structure.
3155 3162 * Further, it is always safe to dereference the p_pidp member
3156 3163 * of one's own proc structure. (These are truisms becuase
3157 3164 * threads and processes don't clean up their own state --
3158 3165 * they leave that task to whomever reaps them.)
3159 3166 */
3160 3167 return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
3161 3168
3162 3169 case DIF_VAR_PPID:
3163 3170 if (!dtrace_priv_proc(state, mstate))
3164 3171 return (0);
3165 3172
3166 3173 /*
3167 3174 * See comment in DIF_VAR_PID.
3168 3175 */
3169 3176 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3170 3177 return (pid0.pid_id);
3171 3178
3172 3179 /*
3173 3180 * It is always safe to dereference one's own t_procp pointer:
3174 3181 * it always points to a valid, allocated proc structure.
3175 3182 * (This is true because threads don't clean up their own
3176 3183 * state -- they leave that task to whomever reaps them.)
3177 3184 */
3178 3185 return ((uint64_t)curthread->t_procp->p_ppid);
3179 3186
3180 3187 case DIF_VAR_TID:
3181 3188 /*
3182 3189 * See comment in DIF_VAR_PID.
3183 3190 */
3184 3191 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3185 3192 return (0);
3186 3193
3187 3194 return ((uint64_t)curthread->t_tid);
3188 3195
3189 3196 case DIF_VAR_EXECNAME:
3190 3197 if (!dtrace_priv_proc(state, mstate))
3191 3198 return (0);
3192 3199
3193 3200 /*
3194 3201 * See comment in DIF_VAR_PID.
3195 3202 */
3196 3203 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3197 3204 return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
3198 3205
3199 3206 /*
3200 3207 * It is always safe to dereference one's own t_procp pointer:
3201 3208 * it always points to a valid, allocated proc structure.
3202 3209 * (This is true because threads don't clean up their own
3203 3210 * state -- they leave that task to whomever reaps them.)
3204 3211 */
3205 3212 return (dtrace_dif_varstr(
3206 3213 (uintptr_t)curthread->t_procp->p_user.u_comm,
3207 3214 state, mstate));
3208 3215
3209 3216 case DIF_VAR_ZONENAME:
3210 3217 if (!dtrace_priv_proc(state, mstate))
3211 3218 return (0);
3212 3219
3213 3220 /*
3214 3221 * See comment in DIF_VAR_PID.
3215 3222 */
3216 3223 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3217 3224 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
3218 3225
3219 3226 /*
3220 3227 * It is always safe to dereference one's own t_procp pointer:
3221 3228 * it always points to a valid, allocated proc structure.
3222 3229 * (This is true because threads don't clean up their own
3223 3230 * state -- they leave that task to whomever reaps them.)
3224 3231 */
3225 3232 return (dtrace_dif_varstr(
3226 3233 (uintptr_t)curthread->t_procp->p_zone->zone_name,
3227 3234 state, mstate));
3228 3235
3229 3236 case DIF_VAR_UID:
3230 3237 if (!dtrace_priv_proc(state, mstate))
3231 3238 return (0);
3232 3239
3233 3240 /*
3234 3241 * See comment in DIF_VAR_PID.
3235 3242 */
3236 3243 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3237 3244 return ((uint64_t)p0.p_cred->cr_uid);
3238 3245
3239 3246 /*
3240 3247 * It is always safe to dereference one's own t_procp pointer:
3241 3248 * it always points to a valid, allocated proc structure.
3242 3249 * (This is true because threads don't clean up their own
3243 3250 * state -- they leave that task to whomever reaps them.)
3244 3251 *
3245 3252 * Additionally, it is safe to dereference one's own process
3246 3253 * credential, since this is never NULL after process birth.
3247 3254 */
3248 3255 return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
3249 3256
3250 3257 case DIF_VAR_GID:
3251 3258 if (!dtrace_priv_proc(state, mstate))
3252 3259 return (0);
3253 3260
3254 3261 /*
3255 3262 * See comment in DIF_VAR_PID.
3256 3263 */
3257 3264 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3258 3265 return ((uint64_t)p0.p_cred->cr_gid);
3259 3266
3260 3267 /*
3261 3268 * It is always safe to dereference one's own t_procp pointer:
3262 3269 * it always points to a valid, allocated proc structure.
3263 3270 * (This is true because threads don't clean up their own
3264 3271 * state -- they leave that task to whomever reaps them.)
3265 3272 *
3266 3273 * Additionally, it is safe to dereference one's own process
3267 3274 * credential, since this is never NULL after process birth.
3268 3275 */
3269 3276 return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
3270 3277
3271 3278 case DIF_VAR_ERRNO: {
3272 3279 klwp_t *lwp;
3273 3280 if (!dtrace_priv_proc(state, mstate))
3274 3281 return (0);
3275 3282
3276 3283 /*
3277 3284 * See comment in DIF_VAR_PID.
3278 3285 */
3279 3286 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3280 3287 return (0);
3281 3288
3282 3289 /*
3283 3290 * It is always safe to dereference one's own t_lwp pointer in
3284 3291 * the event that this pointer is non-NULL. (This is true
3285 3292 * because threads and lwps don't clean up their own state --
3286 3293 * they leave that task to whomever reaps them.)
3287 3294 */
3288 3295 if ((lwp = curthread->t_lwp) == NULL)
3289 3296 return (0);
3290 3297
3291 3298 return ((uint64_t)lwp->lwp_errno);
3292 3299 }
3293 3300 default:
3294 3301 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3295 3302 return (0);
3296 3303 }
3297 3304 }
3298 3305
3299 3306 /*
3300 3307 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3301 3308 * Notice that we don't bother validating the proper number of arguments or
3302 3309 * their types in the tuple stack. This isn't needed because all argument
3303 3310 * interpretation is safe because of our load safety -- the worst that can
3304 3311 * happen is that a bogus program can obtain bogus results.
3305 3312 */
3306 3313 static void
3307 3314 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
3308 3315 dtrace_key_t *tupregs, int nargs,
3309 3316 dtrace_mstate_t *mstate, dtrace_state_t *state)
3310 3317 {
3311 3318 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
3312 3319 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
3313 3320 dtrace_vstate_t *vstate = &state->dts_vstate;
3314 3321
3315 3322 union {
3316 3323 mutex_impl_t mi;
3317 3324 uint64_t mx;
3318 3325 } m;
3319 3326
3320 3327 union {
3321 3328 krwlock_t ri;
3322 3329 uintptr_t rw;
3323 3330 } r;
3324 3331
3325 3332 switch (subr) {
3326 3333 case DIF_SUBR_RAND:
3327 3334 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3328 3335 break;
3329 3336
3330 3337 case DIF_SUBR_MUTEX_OWNED:
3331 3338 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3332 3339 mstate, vstate)) {
3333 3340 regs[rd] = NULL;
3334 3341 break;
3335 3342 }
3336 3343
3337 3344 m.mx = dtrace_load64(tupregs[0].dttk_value);
3338 3345 if (MUTEX_TYPE_ADAPTIVE(&m.mi))
3339 3346 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
3340 3347 else
3341 3348 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
3342 3349 break;
3343 3350
3344 3351 case DIF_SUBR_MUTEX_OWNER:
3345 3352 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3346 3353 mstate, vstate)) {
3347 3354 regs[rd] = NULL;
3348 3355 break;
3349 3356 }
3350 3357
3351 3358 m.mx = dtrace_load64(tupregs[0].dttk_value);
3352 3359 if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
3353 3360 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
3354 3361 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
3355 3362 else
3356 3363 regs[rd] = 0;
3357 3364 break;
3358 3365
3359 3366 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
3360 3367 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3361 3368 mstate, vstate)) {
3362 3369 regs[rd] = NULL;
3363 3370 break;
3364 3371 }
3365 3372
3366 3373 m.mx = dtrace_load64(tupregs[0].dttk_value);
3367 3374 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
3368 3375 break;
3369 3376
3370 3377 case DIF_SUBR_MUTEX_TYPE_SPIN:
3371 3378 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3372 3379 mstate, vstate)) {
3373 3380 regs[rd] = NULL;
3374 3381 break;
3375 3382 }
3376 3383
3377 3384 m.mx = dtrace_load64(tupregs[0].dttk_value);
3378 3385 regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
3379 3386 break;
3380 3387
3381 3388 case DIF_SUBR_RW_READ_HELD: {
3382 3389 uintptr_t tmp;
3383 3390
3384 3391 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
3385 3392 mstate, vstate)) {
3386 3393 regs[rd] = NULL;
3387 3394 break;
3388 3395 }
3389 3396
3390 3397 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3391 3398 regs[rd] = _RW_READ_HELD(&r.ri, tmp);
3392 3399 break;
3393 3400 }
3394 3401
3395 3402 case DIF_SUBR_RW_WRITE_HELD:
3396 3403 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3397 3404 mstate, vstate)) {
3398 3405 regs[rd] = NULL;
3399 3406 break;
3400 3407 }
3401 3408
3402 3409 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3403 3410 regs[rd] = _RW_WRITE_HELD(&r.ri);
3404 3411 break;
3405 3412
3406 3413 case DIF_SUBR_RW_ISWRITER:
3407 3414 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3408 3415 mstate, vstate)) {
3409 3416 regs[rd] = NULL;
3410 3417 break;
3411 3418 }
3412 3419
3413 3420 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3414 3421 regs[rd] = _RW_ISWRITER(&r.ri);
3415 3422 break;
3416 3423
3417 3424 case DIF_SUBR_BCOPY: {
3418 3425 /*
3419 3426 * We need to be sure that the destination is in the scratch
3420 3427 * region -- no other region is allowed.
3421 3428 */
3422 3429 uintptr_t src = tupregs[0].dttk_value;
3423 3430 uintptr_t dest = tupregs[1].dttk_value;
3424 3431 size_t size = tupregs[2].dttk_value;
3425 3432
3426 3433 if (!dtrace_inscratch(dest, size, mstate)) {
3427 3434 *flags |= CPU_DTRACE_BADADDR;
3428 3435 *illval = regs[rd];
3429 3436 break;
3430 3437 }
3431 3438
3432 3439 if (!dtrace_canload(src, size, mstate, vstate)) {
3433 3440 regs[rd] = NULL;
3434 3441 break;
3435 3442 }
3436 3443
3437 3444 dtrace_bcopy((void *)src, (void *)dest, size);
3438 3445 break;
3439 3446 }
3440 3447
3441 3448 case DIF_SUBR_ALLOCA:
3442 3449 case DIF_SUBR_COPYIN: {
3443 3450 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
3444 3451 uint64_t size =
3445 3452 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
3446 3453 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
3447 3454
3448 3455 /*
3449 3456 * This action doesn't require any credential checks since
3450 3457 * probes will not activate in user contexts to which the
3451 3458 * enabling user does not have permissions.
3452 3459 */
3453 3460
3454 3461 /*
3455 3462 * Rounding up the user allocation size could have overflowed
3456 3463 * a large, bogus allocation (like -1ULL) to 0.
3457 3464 */
3458 3465 if (scratch_size < size ||
3459 3466 !DTRACE_INSCRATCH(mstate, scratch_size)) {
3460 3467 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3461 3468 regs[rd] = NULL;
3462 3469 break;
3463 3470 }
3464 3471
3465 3472 if (subr == DIF_SUBR_COPYIN) {
3466 3473 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3467 3474 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
3468 3475 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3469 3476 }
3470 3477
3471 3478 mstate->dtms_scratch_ptr += scratch_size;
3472 3479 regs[rd] = dest;
3473 3480 break;
3474 3481 }
3475 3482
3476 3483 case DIF_SUBR_COPYINTO: {
3477 3484 uint64_t size = tupregs[1].dttk_value;
3478 3485 uintptr_t dest = tupregs[2].dttk_value;
3479 3486
3480 3487 /*
3481 3488 * This action doesn't require any credential checks since
3482 3489 * probes will not activate in user contexts to which the
3483 3490 * enabling user does not have permissions.
3484 3491 */
3485 3492 if (!dtrace_inscratch(dest, size, mstate)) {
3486 3493 *flags |= CPU_DTRACE_BADADDR;
3487 3494 *illval = regs[rd];
3488 3495 break;
3489 3496 }
3490 3497
3491 3498 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3492 3499 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
3493 3500 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3494 3501 break;
3495 3502 }
3496 3503
3497 3504 case DIF_SUBR_COPYINSTR: {
3498 3505 uintptr_t dest = mstate->dtms_scratch_ptr;
3499 3506 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3500 3507
3501 3508 if (nargs > 1 && tupregs[1].dttk_value < size)
3502 3509 size = tupregs[1].dttk_value + 1;
3503 3510
3504 3511 /*
3505 3512 * This action doesn't require any credential checks since
3506 3513 * probes will not activate in user contexts to which the
3507 3514 * enabling user does not have permissions.
3508 3515 */
3509 3516 if (!DTRACE_INSCRATCH(mstate, size)) {
3510 3517 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3511 3518 regs[rd] = NULL;
3512 3519 break;
3513 3520 }
3514 3521
3515 3522 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3516 3523 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
3517 3524 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3518 3525
3519 3526 ((char *)dest)[size - 1] = '\0';
3520 3527 mstate->dtms_scratch_ptr += size;
3521 3528 regs[rd] = dest;
3522 3529 break;
3523 3530 }
3524 3531
3525 3532 case DIF_SUBR_MSGSIZE:
3526 3533 case DIF_SUBR_MSGDSIZE: {
3527 3534 uintptr_t baddr = tupregs[0].dttk_value, daddr;
3528 3535 uintptr_t wptr, rptr;
3529 3536 size_t count = 0;
3530 3537 int cont = 0;
3531 3538
3532 3539 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
3533 3540
3534 3541 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
3535 3542 vstate)) {
3536 3543 regs[rd] = NULL;
3537 3544 break;
3538 3545 }
3539 3546
3540 3547 wptr = dtrace_loadptr(baddr +
3541 3548 offsetof(mblk_t, b_wptr));
3542 3549
3543 3550 rptr = dtrace_loadptr(baddr +
3544 3551 offsetof(mblk_t, b_rptr));
3545 3552
3546 3553 if (wptr < rptr) {
3547 3554 *flags |= CPU_DTRACE_BADADDR;
3548 3555 *illval = tupregs[0].dttk_value;
3549 3556 break;
3550 3557 }
3551 3558
3552 3559 daddr = dtrace_loadptr(baddr +
3553 3560 offsetof(mblk_t, b_datap));
3554 3561
3555 3562 baddr = dtrace_loadptr(baddr +
3556 3563 offsetof(mblk_t, b_cont));
3557 3564
3558 3565 /*
3559 3566 * We want to prevent against denial-of-service here,
3560 3567 * so we're only going to search the list for
3561 3568 * dtrace_msgdsize_max mblks.
3562 3569 */
3563 3570 if (cont++ > dtrace_msgdsize_max) {
3564 3571 *flags |= CPU_DTRACE_ILLOP;
3565 3572 break;
3566 3573 }
3567 3574
3568 3575 if (subr == DIF_SUBR_MSGDSIZE) {
3569 3576 if (dtrace_load8(daddr +
3570 3577 offsetof(dblk_t, db_type)) != M_DATA)
3571 3578 continue;
3572 3579 }
3573 3580
3574 3581 count += wptr - rptr;
3575 3582 }
3576 3583
3577 3584 if (!(*flags & CPU_DTRACE_FAULT))
3578 3585 regs[rd] = count;
3579 3586
3580 3587 break;
3581 3588 }
3582 3589
3583 3590 case DIF_SUBR_PROGENYOF: {
3584 3591 pid_t pid = tupregs[0].dttk_value;
3585 3592 proc_t *p;
3586 3593 int rval = 0;
3587 3594
3588 3595 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3589 3596
3590 3597 for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
3591 3598 if (p->p_pidp->pid_id == pid) {
3592 3599 rval = 1;
3593 3600 break;
3594 3601 }
3595 3602 }
3596 3603
3597 3604 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3598 3605
3599 3606 regs[rd] = rval;
3600 3607 break;
3601 3608 }
3602 3609
3603 3610 case DIF_SUBR_SPECULATION:
3604 3611 regs[rd] = dtrace_speculation(state);
3605 3612 break;
3606 3613
3607 3614 case DIF_SUBR_COPYOUT: {
3608 3615 uintptr_t kaddr = tupregs[0].dttk_value;
3609 3616 uintptr_t uaddr = tupregs[1].dttk_value;
3610 3617 uint64_t size = tupregs[2].dttk_value;
3611 3618
3612 3619 if (!dtrace_destructive_disallow &&
3613 3620 dtrace_priv_proc_control(state, mstate) &&
3614 3621 !dtrace_istoxic(kaddr, size)) {
3615 3622 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3616 3623 dtrace_copyout(kaddr, uaddr, size, flags);
3617 3624 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3618 3625 }
3619 3626 break;
3620 3627 }
3621 3628
3622 3629 case DIF_SUBR_COPYOUTSTR: {
3623 3630 uintptr_t kaddr = tupregs[0].dttk_value;
3624 3631 uintptr_t uaddr = tupregs[1].dttk_value;
3625 3632 uint64_t size = tupregs[2].dttk_value;
3626 3633
3627 3634 if (!dtrace_destructive_disallow &&
3628 3635 dtrace_priv_proc_control(state, mstate) &&
3629 3636 !dtrace_istoxic(kaddr, size)) {
3630 3637 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3631 3638 dtrace_copyoutstr(kaddr, uaddr, size, flags);
3632 3639 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3633 3640 }
3634 3641 break;
3635 3642 }
3636 3643
3637 3644 case DIF_SUBR_STRLEN: {
3638 3645 size_t sz;
3639 3646 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
3640 3647 sz = dtrace_strlen((char *)addr,
3641 3648 state->dts_options[DTRACEOPT_STRSIZE]);
3642 3649
3643 3650 if (!dtrace_canload(addr, sz + 1, mstate, vstate)) {
3644 3651 regs[rd] = NULL;
3645 3652 break;
3646 3653 }
3647 3654
3648 3655 regs[rd] = sz;
3649 3656
3650 3657 break;
3651 3658 }
3652 3659
3653 3660 case DIF_SUBR_STRCHR:
3654 3661 case DIF_SUBR_STRRCHR: {
3655 3662 /*
3656 3663 * We're going to iterate over the string looking for the
3657 3664 * specified character. We will iterate until we have reached
3658 3665 * the string length or we have found the character. If this
3659 3666 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3660 3667 * of the specified character instead of the first.
3661 3668 */
3662 3669 uintptr_t saddr = tupregs[0].dttk_value;
3663 3670 uintptr_t addr = tupregs[0].dttk_value;
3664 3671 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE];
3665 3672 char c, target = (char)tupregs[1].dttk_value;
3666 3673
3667 3674 for (regs[rd] = NULL; addr < limit; addr++) {
3668 3675 if ((c = dtrace_load8(addr)) == target) {
3669 3676 regs[rd] = addr;
3670 3677
3671 3678 if (subr == DIF_SUBR_STRCHR)
3672 3679 break;
3673 3680 }
3674 3681
3675 3682 if (c == '\0')
3676 3683 break;
3677 3684 }
3678 3685
3679 3686 if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) {
3680 3687 regs[rd] = NULL;
3681 3688 break;
3682 3689 }
3683 3690
3684 3691 break;
3685 3692 }
3686 3693
3687 3694 case DIF_SUBR_STRSTR:
3688 3695 case DIF_SUBR_INDEX:
3689 3696 case DIF_SUBR_RINDEX: {
3690 3697 /*
3691 3698 * We're going to iterate over the string looking for the
3692 3699 * specified string. We will iterate until we have reached
3693 3700 * the string length or we have found the string. (Yes, this
3694 3701 * is done in the most naive way possible -- but considering
3695 3702 * that the string we're searching for is likely to be
3696 3703 * relatively short, the complexity of Rabin-Karp or similar
3697 3704 * hardly seems merited.)
3698 3705 */
3699 3706 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
3700 3707 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
3701 3708 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3702 3709 size_t len = dtrace_strlen(addr, size);
3703 3710 size_t sublen = dtrace_strlen(substr, size);
3704 3711 char *limit = addr + len, *orig = addr;
3705 3712 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
3706 3713 int inc = 1;
3707 3714
3708 3715 regs[rd] = notfound;
3709 3716
3710 3717 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
3711 3718 regs[rd] = NULL;
3712 3719 break;
3713 3720 }
3714 3721
3715 3722 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
3716 3723 vstate)) {
3717 3724 regs[rd] = NULL;
3718 3725 break;
3719 3726 }
3720 3727
3721 3728 /*
3722 3729 * strstr() and index()/rindex() have similar semantics if
3723 3730 * both strings are the empty string: strstr() returns a
3724 3731 * pointer to the (empty) string, and index() and rindex()
3725 3732 * both return index 0 (regardless of any position argument).
3726 3733 */
3727 3734 if (sublen == 0 && len == 0) {
3728 3735 if (subr == DIF_SUBR_STRSTR)
3729 3736 regs[rd] = (uintptr_t)addr;
3730 3737 else
3731 3738 regs[rd] = 0;
3732 3739 break;
3733 3740 }
3734 3741
3735 3742 if (subr != DIF_SUBR_STRSTR) {
3736 3743 if (subr == DIF_SUBR_RINDEX) {
3737 3744 limit = orig - 1;
3738 3745 addr += len;
3739 3746 inc = -1;
3740 3747 }
3741 3748
3742 3749 /*
3743 3750 * Both index() and rindex() take an optional position
3744 3751 * argument that denotes the starting position.
3745 3752 */
3746 3753 if (nargs == 3) {
3747 3754 int64_t pos = (int64_t)tupregs[2].dttk_value;
3748 3755
3749 3756 /*
3750 3757 * If the position argument to index() is
3751 3758 * negative, Perl implicitly clamps it at
3752 3759 * zero. This semantic is a little surprising
3753 3760 * given the special meaning of negative
3754 3761 * positions to similar Perl functions like
3755 3762 * substr(), but it appears to reflect a
3756 3763 * notion that index() can start from a
3757 3764 * negative index and increment its way up to
3758 3765 * the string. Given this notion, Perl's
3759 3766 * rindex() is at least self-consistent in
3760 3767 * that it implicitly clamps positions greater
3761 3768 * than the string length to be the string
3762 3769 * length. Where Perl completely loses
3763 3770 * coherence, however, is when the specified
3764 3771 * substring is the empty string (""). In
3765 3772 * this case, even if the position is
3766 3773 * negative, rindex() returns 0 -- and even if
3767 3774 * the position is greater than the length,
3768 3775 * index() returns the string length. These
3769 3776 * semantics violate the notion that index()
3770 3777 * should never return a value less than the
3771 3778 * specified position and that rindex() should
3772 3779 * never return a value greater than the
3773 3780 * specified position. (One assumes that
3774 3781 * these semantics are artifacts of Perl's
3775 3782 * implementation and not the results of
3776 3783 * deliberate design -- it beggars belief that
3777 3784 * even Larry Wall could desire such oddness.)
3778 3785 * While in the abstract one would wish for
3779 3786 * consistent position semantics across
3780 3787 * substr(), index() and rindex() -- or at the
3781 3788 * very least self-consistent position
3782 3789 * semantics for index() and rindex() -- we
3783 3790 * instead opt to keep with the extant Perl
3784 3791 * semantics, in all their broken glory. (Do
3785 3792 * we have more desire to maintain Perl's
3786 3793 * semantics than Perl does? Probably.)
3787 3794 */
3788 3795 if (subr == DIF_SUBR_RINDEX) {
3789 3796 if (pos < 0) {
3790 3797 if (sublen == 0)
3791 3798 regs[rd] = 0;
3792 3799 break;
3793 3800 }
3794 3801
3795 3802 if (pos > len)
3796 3803 pos = len;
3797 3804 } else {
3798 3805 if (pos < 0)
3799 3806 pos = 0;
3800 3807
3801 3808 if (pos >= len) {
3802 3809 if (sublen == 0)
3803 3810 regs[rd] = len;
3804 3811 break;
3805 3812 }
3806 3813 }
3807 3814
3808 3815 addr = orig + pos;
3809 3816 }
3810 3817 }
3811 3818
3812 3819 for (regs[rd] = notfound; addr != limit; addr += inc) {
3813 3820 if (dtrace_strncmp(addr, substr, sublen) == 0) {
3814 3821 if (subr != DIF_SUBR_STRSTR) {
3815 3822 /*
3816 3823 * As D index() and rindex() are
3817 3824 * modeled on Perl (and not on awk),
3818 3825 * we return a zero-based (and not a
3819 3826 * one-based) index. (For you Perl
3820 3827 * weenies: no, we're not going to add
3821 3828 * $[ -- and shouldn't you be at a con
3822 3829 * or something?)
3823 3830 */
3824 3831 regs[rd] = (uintptr_t)(addr - orig);
3825 3832 break;
3826 3833 }
3827 3834
3828 3835 ASSERT(subr == DIF_SUBR_STRSTR);
3829 3836 regs[rd] = (uintptr_t)addr;
3830 3837 break;
3831 3838 }
3832 3839 }
3833 3840
3834 3841 break;
3835 3842 }
3836 3843
3837 3844 case DIF_SUBR_STRTOK: {
3838 3845 uintptr_t addr = tupregs[0].dttk_value;
3839 3846 uintptr_t tokaddr = tupregs[1].dttk_value;
3840 3847 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3841 3848 uintptr_t limit, toklimit = tokaddr + size;
3842 3849 uint8_t c, tokmap[32]; /* 256 / 8 */
3843 3850 char *dest = (char *)mstate->dtms_scratch_ptr;
3844 3851 int i;
3845 3852
3846 3853 /*
3847 3854 * Check both the token buffer and (later) the input buffer,
3848 3855 * since both could be non-scratch addresses.
3849 3856 */
3850 3857 if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) {
3851 3858 regs[rd] = NULL;
3852 3859 break;
3853 3860 }
3854 3861
3855 3862 if (!DTRACE_INSCRATCH(mstate, size)) {
3856 3863 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3857 3864 regs[rd] = NULL;
3858 3865 break;
3859 3866 }
3860 3867
3861 3868 if (addr == NULL) {
3862 3869 /*
3863 3870 * If the address specified is NULL, we use our saved
3864 3871 * strtok pointer from the mstate. Note that this
3865 3872 * means that the saved strtok pointer is _only_
3866 3873 * valid within multiple enablings of the same probe --
3867 3874 * it behaves like an implicit clause-local variable.
3868 3875 */
3869 3876 addr = mstate->dtms_strtok;
3870 3877 } else {
3871 3878 /*
3872 3879 * If the user-specified address is non-NULL we must
3873 3880 * access check it. This is the only time we have
3874 3881 * a chance to do so, since this address may reside
3875 3882 * in the string table of this clause-- future calls
3876 3883 * (when we fetch addr from mstate->dtms_strtok)
3877 3884 * would fail this access check.
3878 3885 */
3879 3886 if (!dtrace_strcanload(addr, size, mstate, vstate)) {
3880 3887 regs[rd] = NULL;
3881 3888 break;
3882 3889 }
3883 3890 }
3884 3891
3885 3892 /*
3886 3893 * First, zero the token map, and then process the token
3887 3894 * string -- setting a bit in the map for every character
3888 3895 * found in the token string.
3889 3896 */
3890 3897 for (i = 0; i < sizeof (tokmap); i++)
3891 3898 tokmap[i] = 0;
3892 3899
3893 3900 for (; tokaddr < toklimit; tokaddr++) {
3894 3901 if ((c = dtrace_load8(tokaddr)) == '\0')
3895 3902 break;
3896 3903
3897 3904 ASSERT((c >> 3) < sizeof (tokmap));
3898 3905 tokmap[c >> 3] |= (1 << (c & 0x7));
3899 3906 }
3900 3907
3901 3908 for (limit = addr + size; addr < limit; addr++) {
3902 3909 /*
3903 3910 * We're looking for a character that is _not_ contained
3904 3911 * in the token string.
3905 3912 */
3906 3913 if ((c = dtrace_load8(addr)) == '\0')
3907 3914 break;
3908 3915
3909 3916 if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
3910 3917 break;
3911 3918 }
3912 3919
3913 3920 if (c == '\0') {
3914 3921 /*
3915 3922 * We reached the end of the string without finding
3916 3923 * any character that was not in the token string.
3917 3924 * We return NULL in this case, and we set the saved
3918 3925 * address to NULL as well.
3919 3926 */
3920 3927 regs[rd] = NULL;
3921 3928 mstate->dtms_strtok = NULL;
3922 3929 break;
3923 3930 }
3924 3931
3925 3932 /*
3926 3933 * From here on, we're copying into the destination string.
3927 3934 */
3928 3935 for (i = 0; addr < limit && i < size - 1; addr++) {
3929 3936 if ((c = dtrace_load8(addr)) == '\0')
3930 3937 break;
3931 3938
3932 3939 if (tokmap[c >> 3] & (1 << (c & 0x7)))
3933 3940 break;
3934 3941
3935 3942 ASSERT(i < size);
3936 3943 dest[i++] = c;
3937 3944 }
3938 3945
3939 3946 ASSERT(i < size);
3940 3947 dest[i] = '\0';
3941 3948 regs[rd] = (uintptr_t)dest;
3942 3949 mstate->dtms_scratch_ptr += size;
3943 3950 mstate->dtms_strtok = addr;
3944 3951 break;
3945 3952 }
3946 3953
3947 3954 case DIF_SUBR_SUBSTR: {
3948 3955 uintptr_t s = tupregs[0].dttk_value;
3949 3956 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3950 3957 char *d = (char *)mstate->dtms_scratch_ptr;
3951 3958 int64_t index = (int64_t)tupregs[1].dttk_value;
3952 3959 int64_t remaining = (int64_t)tupregs[2].dttk_value;
3953 3960 size_t len = dtrace_strlen((char *)s, size);
3954 3961 int64_t i;
3955 3962
3956 3963 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
3957 3964 regs[rd] = NULL;
3958 3965 break;
3959 3966 }
3960 3967
3961 3968 if (!DTRACE_INSCRATCH(mstate, size)) {
3962 3969 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3963 3970 regs[rd] = NULL;
3964 3971 break;
3965 3972 }
3966 3973
3967 3974 if (nargs <= 2)
3968 3975 remaining = (int64_t)size;
3969 3976
3970 3977 if (index < 0) {
3971 3978 index += len;
3972 3979
3973 3980 if (index < 0 && index + remaining > 0) {
3974 3981 remaining += index;
3975 3982 index = 0;
3976 3983 }
3977 3984 }
3978 3985
3979 3986 if (index >= len || index < 0) {
3980 3987 remaining = 0;
3981 3988 } else if (remaining < 0) {
3982 3989 remaining += len - index;
3983 3990 } else if (index + remaining > size) {
3984 3991 remaining = size - index;
3985 3992 }
3986 3993
3987 3994 for (i = 0; i < remaining; i++) {
3988 3995 if ((d[i] = dtrace_load8(s + index + i)) == '\0')
3989 3996 break;
3990 3997 }
3991 3998
3992 3999 d[i] = '\0';
3993 4000
3994 4001 mstate->dtms_scratch_ptr += size;
3995 4002 regs[rd] = (uintptr_t)d;
3996 4003 break;
3997 4004 }
3998 4005
3999 4006 case DIF_SUBR_TOUPPER:
4000 4007 case DIF_SUBR_TOLOWER: {
4001 4008 uintptr_t s = tupregs[0].dttk_value;
4002 4009 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4003 4010 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4004 4011 size_t len = dtrace_strlen((char *)s, size);
4005 4012 char lower, upper, convert;
4006 4013 int64_t i;
4007 4014
4008 4015 if (subr == DIF_SUBR_TOUPPER) {
4009 4016 lower = 'a';
4010 4017 upper = 'z';
4011 4018 convert = 'A';
4012 4019 } else {
4013 4020 lower = 'A';
4014 4021 upper = 'Z';
4015 4022 convert = 'a';
4016 4023 }
4017 4024
4018 4025 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
4019 4026 regs[rd] = NULL;
4020 4027 break;
4021 4028 }
4022 4029
4023 4030 if (!DTRACE_INSCRATCH(mstate, size)) {
4024 4031 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4025 4032 regs[rd] = NULL;
4026 4033 break;
4027 4034 }
4028 4035
4029 4036 for (i = 0; i < size - 1; i++) {
4030 4037 if ((c = dtrace_load8(s + i)) == '\0')
4031 4038 break;
4032 4039
4033 4040 if (c >= lower && c <= upper)
4034 4041 c = convert + (c - lower);
4035 4042
4036 4043 dest[i] = c;
4037 4044 }
4038 4045
4039 4046 ASSERT(i < size);
4040 4047 dest[i] = '\0';
4041 4048 regs[rd] = (uintptr_t)dest;
4042 4049 mstate->dtms_scratch_ptr += size;
4043 4050 break;
4044 4051 }
4045 4052
4046 4053 case DIF_SUBR_GETMAJOR:
4047 4054 #ifdef _LP64
4048 4055 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
4049 4056 #else
4050 4057 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
4051 4058 #endif
4052 4059 break;
4053 4060
4054 4061 case DIF_SUBR_GETMINOR:
4055 4062 #ifdef _LP64
4056 4063 regs[rd] = tupregs[0].dttk_value & MAXMIN64;
4057 4064 #else
4058 4065 regs[rd] = tupregs[0].dttk_value & MAXMIN;
4059 4066 #endif
4060 4067 break;
4061 4068
4062 4069 case DIF_SUBR_DDI_PATHNAME: {
4063 4070 /*
4064 4071 * This one is a galactic mess. We are going to roughly
4065 4072 * emulate ddi_pathname(), but it's made more complicated
4066 4073 * by the fact that we (a) want to include the minor name and
4067 4074 * (b) must proceed iteratively instead of recursively.
4068 4075 */
4069 4076 uintptr_t dest = mstate->dtms_scratch_ptr;
4070 4077 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4071 4078 char *start = (char *)dest, *end = start + size - 1;
4072 4079 uintptr_t daddr = tupregs[0].dttk_value;
4073 4080 int64_t minor = (int64_t)tupregs[1].dttk_value;
4074 4081 char *s;
4075 4082 int i, len, depth = 0;
4076 4083
4077 4084 /*
4078 4085 * Due to all the pointer jumping we do and context we must
4079 4086 * rely upon, we just mandate that the user must have kernel
4080 4087 * read privileges to use this routine.
4081 4088 */
4082 4089 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
4083 4090 *flags |= CPU_DTRACE_KPRIV;
4084 4091 *illval = daddr;
4085 4092 regs[rd] = NULL;
4086 4093 }
4087 4094
4088 4095 if (!DTRACE_INSCRATCH(mstate, size)) {
4089 4096 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4090 4097 regs[rd] = NULL;
4091 4098 break;
4092 4099 }
4093 4100
4094 4101 *end = '\0';
4095 4102
4096 4103 /*
4097 4104 * We want to have a name for the minor. In order to do this,
4098 4105 * we need to walk the minor list from the devinfo. We want
4099 4106 * to be sure that we don't infinitely walk a circular list,
4100 4107 * so we check for circularity by sending a scout pointer
4101 4108 * ahead two elements for every element that we iterate over;
4102 4109 * if the list is circular, these will ultimately point to the
4103 4110 * same element. You may recognize this little trick as the
4104 4111 * answer to a stupid interview question -- one that always
4105 4112 * seems to be asked by those who had to have it laboriously
4106 4113 * explained to them, and who can't even concisely describe
4107 4114 * the conditions under which one would be forced to resort to
4108 4115 * this technique. Needless to say, those conditions are
4109 4116 * found here -- and probably only here. Is this the only use
4110 4117 * of this infamous trick in shipping, production code? If it
4111 4118 * isn't, it probably should be...
4112 4119 */
4113 4120 if (minor != -1) {
4114 4121 uintptr_t maddr = dtrace_loadptr(daddr +
4115 4122 offsetof(struct dev_info, devi_minor));
4116 4123
4117 4124 uintptr_t next = offsetof(struct ddi_minor_data, next);
4118 4125 uintptr_t name = offsetof(struct ddi_minor_data,
4119 4126 d_minor) + offsetof(struct ddi_minor, name);
4120 4127 uintptr_t dev = offsetof(struct ddi_minor_data,
4121 4128 d_minor) + offsetof(struct ddi_minor, dev);
4122 4129 uintptr_t scout;
4123 4130
4124 4131 if (maddr != NULL)
4125 4132 scout = dtrace_loadptr(maddr + next);
4126 4133
4127 4134 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4128 4135 uint64_t m;
4129 4136 #ifdef _LP64
4130 4137 m = dtrace_load64(maddr + dev) & MAXMIN64;
4131 4138 #else
4132 4139 m = dtrace_load32(maddr + dev) & MAXMIN;
4133 4140 #endif
4134 4141 if (m != minor) {
4135 4142 maddr = dtrace_loadptr(maddr + next);
4136 4143
4137 4144 if (scout == NULL)
4138 4145 continue;
4139 4146
4140 4147 scout = dtrace_loadptr(scout + next);
4141 4148
4142 4149 if (scout == NULL)
4143 4150 continue;
4144 4151
4145 4152 scout = dtrace_loadptr(scout + next);
4146 4153
4147 4154 if (scout == NULL)
4148 4155 continue;
4149 4156
4150 4157 if (scout == maddr) {
4151 4158 *flags |= CPU_DTRACE_ILLOP;
4152 4159 break;
4153 4160 }
4154 4161
4155 4162 continue;
4156 4163 }
4157 4164
4158 4165 /*
4159 4166 * We have the minor data. Now we need to
4160 4167 * copy the minor's name into the end of the
4161 4168 * pathname.
4162 4169 */
4163 4170 s = (char *)dtrace_loadptr(maddr + name);
4164 4171 len = dtrace_strlen(s, size);
4165 4172
4166 4173 if (*flags & CPU_DTRACE_FAULT)
4167 4174 break;
4168 4175
4169 4176 if (len != 0) {
4170 4177 if ((end -= (len + 1)) < start)
4171 4178 break;
4172 4179
4173 4180 *end = ':';
4174 4181 }
4175 4182
4176 4183 for (i = 1; i <= len; i++)
4177 4184 end[i] = dtrace_load8((uintptr_t)s++);
4178 4185 break;
4179 4186 }
4180 4187 }
4181 4188
4182 4189 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4183 4190 ddi_node_state_t devi_state;
4184 4191
4185 4192 devi_state = dtrace_load32(daddr +
4186 4193 offsetof(struct dev_info, devi_node_state));
4187 4194
4188 4195 if (*flags & CPU_DTRACE_FAULT)
4189 4196 break;
4190 4197
4191 4198 if (devi_state >= DS_INITIALIZED) {
4192 4199 s = (char *)dtrace_loadptr(daddr +
4193 4200 offsetof(struct dev_info, devi_addr));
4194 4201 len = dtrace_strlen(s, size);
4195 4202
4196 4203 if (*flags & CPU_DTRACE_FAULT)
4197 4204 break;
4198 4205
4199 4206 if (len != 0) {
4200 4207 if ((end -= (len + 1)) < start)
4201 4208 break;
4202 4209
4203 4210 *end = '@';
4204 4211 }
4205 4212
4206 4213 for (i = 1; i <= len; i++)
4207 4214 end[i] = dtrace_load8((uintptr_t)s++);
4208 4215 }
4209 4216
4210 4217 /*
4211 4218 * Now for the node name...
4212 4219 */
4213 4220 s = (char *)dtrace_loadptr(daddr +
4214 4221 offsetof(struct dev_info, devi_node_name));
4215 4222
4216 4223 daddr = dtrace_loadptr(daddr +
4217 4224 offsetof(struct dev_info, devi_parent));
4218 4225
4219 4226 /*
4220 4227 * If our parent is NULL (that is, if we're the root
4221 4228 * node), we're going to use the special path
4222 4229 * "devices".
4223 4230 */
4224 4231 if (daddr == NULL)
4225 4232 s = "devices";
4226 4233
4227 4234 len = dtrace_strlen(s, size);
4228 4235 if (*flags & CPU_DTRACE_FAULT)
4229 4236 break;
4230 4237
4231 4238 if ((end -= (len + 1)) < start)
4232 4239 break;
4233 4240
4234 4241 for (i = 1; i <= len; i++)
4235 4242 end[i] = dtrace_load8((uintptr_t)s++);
4236 4243 *end = '/';
4237 4244
4238 4245 if (depth++ > dtrace_devdepth_max) {
4239 4246 *flags |= CPU_DTRACE_ILLOP;
4240 4247 break;
4241 4248 }
4242 4249 }
4243 4250
4244 4251 if (end < start)
4245 4252 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4246 4253
4247 4254 if (daddr == NULL) {
4248 4255 regs[rd] = (uintptr_t)end;
4249 4256 mstate->dtms_scratch_ptr += size;
4250 4257 }
4251 4258
4252 4259 break;
4253 4260 }
4254 4261
4255 4262 case DIF_SUBR_STRJOIN: {
4256 4263 char *d = (char *)mstate->dtms_scratch_ptr;
4257 4264 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4258 4265 uintptr_t s1 = tupregs[0].dttk_value;
4259 4266 uintptr_t s2 = tupregs[1].dttk_value;
4260 4267 int i = 0;
4261 4268
4262 4269 if (!dtrace_strcanload(s1, size, mstate, vstate) ||
4263 4270 !dtrace_strcanload(s2, size, mstate, vstate)) {
4264 4271 regs[rd] = NULL;
4265 4272 break;
4266 4273 }
4267 4274
4268 4275 if (!DTRACE_INSCRATCH(mstate, size)) {
4269 4276 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4270 4277 regs[rd] = NULL;
4271 4278 break;
4272 4279 }
4273 4280
4274 4281 for (;;) {
4275 4282 if (i >= size) {
4276 4283 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4277 4284 regs[rd] = NULL;
4278 4285 break;
4279 4286 }
4280 4287
4281 4288 if ((d[i++] = dtrace_load8(s1++)) == '\0') {
4282 4289 i--;
4283 4290 break;
4284 4291 }
4285 4292 }
4286 4293
4287 4294 for (;;) {
4288 4295 if (i >= size) {
4289 4296 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4290 4297 regs[rd] = NULL;
4291 4298 break;
4292 4299 }
4293 4300
4294 4301 if ((d[i++] = dtrace_load8(s2++)) == '\0')
4295 4302 break;
4296 4303 }
4297 4304
4298 4305 if (i < size) {
4299 4306 mstate->dtms_scratch_ptr += i;
4300 4307 regs[rd] = (uintptr_t)d;
4301 4308 }
4302 4309
4303 4310 break;
4304 4311 }
4305 4312
4306 4313 case DIF_SUBR_LLTOSTR: {
4307 4314 int64_t i = (int64_t)tupregs[0].dttk_value;
4308 4315 uint64_t val, digit;
4309 4316 uint64_t size = 65; /* enough room for 2^64 in binary */
4310 4317 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
4311 4318 int base = 10;
4312 4319
4313 4320 if (nargs > 1) {
4314 4321 if ((base = tupregs[1].dttk_value) <= 1 ||
4315 4322 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
4316 4323 *flags |= CPU_DTRACE_ILLOP;
4317 4324 break;
4318 4325 }
4319 4326 }
4320 4327
4321 4328 val = (base == 10 && i < 0) ? i * -1 : i;
4322 4329
4323 4330 if (!DTRACE_INSCRATCH(mstate, size)) {
4324 4331 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4325 4332 regs[rd] = NULL;
4326 4333 break;
4327 4334 }
4328 4335
4329 4336 for (*end-- = '\0'; val; val /= base) {
4330 4337 if ((digit = val % base) <= '9' - '0') {
4331 4338 *end-- = '0' + digit;
4332 4339 } else {
4333 4340 *end-- = 'a' + (digit - ('9' - '0') - 1);
4334 4341 }
4335 4342 }
4336 4343
4337 4344 if (i == 0 && base == 16)
4338 4345 *end-- = '0';
4339 4346
4340 4347 if (base == 16)
4341 4348 *end-- = 'x';
4342 4349
4343 4350 if (i == 0 || base == 8 || base == 16)
4344 4351 *end-- = '0';
4345 4352
4346 4353 if (i < 0 && base == 10)
4347 4354 *end-- = '-';
4348 4355
4349 4356 regs[rd] = (uintptr_t)end + 1;
4350 4357 mstate->dtms_scratch_ptr += size;
4351 4358 break;
4352 4359 }
4353 4360
4354 4361 case DIF_SUBR_HTONS:
4355 4362 case DIF_SUBR_NTOHS:
4356 4363 #ifdef _BIG_ENDIAN
4357 4364 regs[rd] = (uint16_t)tupregs[0].dttk_value;
4358 4365 #else
4359 4366 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
4360 4367 #endif
4361 4368 break;
4362 4369
4363 4370
4364 4371 case DIF_SUBR_HTONL:
4365 4372 case DIF_SUBR_NTOHL:
4366 4373 #ifdef _BIG_ENDIAN
4367 4374 regs[rd] = (uint32_t)tupregs[0].dttk_value;
4368 4375 #else
4369 4376 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
4370 4377 #endif
4371 4378 break;
4372 4379
4373 4380
4374 4381 case DIF_SUBR_HTONLL:
4375 4382 case DIF_SUBR_NTOHLL:
4376 4383 #ifdef _BIG_ENDIAN
4377 4384 regs[rd] = (uint64_t)tupregs[0].dttk_value;
4378 4385 #else
4379 4386 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
4380 4387 #endif
4381 4388 break;
4382 4389
4383 4390
4384 4391 case DIF_SUBR_DIRNAME:
4385 4392 case DIF_SUBR_BASENAME: {
4386 4393 char *dest = (char *)mstate->dtms_scratch_ptr;
4387 4394 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4388 4395 uintptr_t src = tupregs[0].dttk_value;
4389 4396 int i, j, len = dtrace_strlen((char *)src, size);
4390 4397 int lastbase = -1, firstbase = -1, lastdir = -1;
4391 4398 int start, end;
4392 4399
4393 4400 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
4394 4401 regs[rd] = NULL;
4395 4402 break;
4396 4403 }
4397 4404
4398 4405 if (!DTRACE_INSCRATCH(mstate, size)) {
4399 4406 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4400 4407 regs[rd] = NULL;
4401 4408 break;
4402 4409 }
4403 4410
4404 4411 /*
4405 4412 * The basename and dirname for a zero-length string is
4406 4413 * defined to be "."
4407 4414 */
4408 4415 if (len == 0) {
4409 4416 len = 1;
4410 4417 src = (uintptr_t)".";
4411 4418 }
4412 4419
4413 4420 /*
4414 4421 * Start from the back of the string, moving back toward the
4415 4422 * front until we see a character that isn't a slash. That
4416 4423 * character is the last character in the basename.
4417 4424 */
4418 4425 for (i = len - 1; i >= 0; i--) {
4419 4426 if (dtrace_load8(src + i) != '/')
4420 4427 break;
4421 4428 }
4422 4429
4423 4430 if (i >= 0)
4424 4431 lastbase = i;
4425 4432
4426 4433 /*
4427 4434 * Starting from the last character in the basename, move
4428 4435 * towards the front until we find a slash. The character
4429 4436 * that we processed immediately before that is the first
4430 4437 * character in the basename.
4431 4438 */
4432 4439 for (; i >= 0; i--) {
4433 4440 if (dtrace_load8(src + i) == '/')
4434 4441 break;
4435 4442 }
4436 4443
4437 4444 if (i >= 0)
4438 4445 firstbase = i + 1;
4439 4446
4440 4447 /*
4441 4448 * Now keep going until we find a non-slash character. That
4442 4449 * character is the last character in the dirname.
4443 4450 */
4444 4451 for (; i >= 0; i--) {
4445 4452 if (dtrace_load8(src + i) != '/')
4446 4453 break;
4447 4454 }
4448 4455
4449 4456 if (i >= 0)
4450 4457 lastdir = i;
4451 4458
4452 4459 ASSERT(!(lastbase == -1 && firstbase != -1));
4453 4460 ASSERT(!(firstbase == -1 && lastdir != -1));
4454 4461
4455 4462 if (lastbase == -1) {
4456 4463 /*
4457 4464 * We didn't find a non-slash character. We know that
4458 4465 * the length is non-zero, so the whole string must be
4459 4466 * slashes. In either the dirname or the basename
4460 4467 * case, we return '/'.
4461 4468 */
4462 4469 ASSERT(firstbase == -1);
4463 4470 firstbase = lastbase = lastdir = 0;
4464 4471 }
4465 4472
4466 4473 if (firstbase == -1) {
4467 4474 /*
4468 4475 * The entire string consists only of a basename
4469 4476 * component. If we're looking for dirname, we need
4470 4477 * to change our string to be just "."; if we're
4471 4478 * looking for a basename, we'll just set the first
4472 4479 * character of the basename to be 0.
4473 4480 */
4474 4481 if (subr == DIF_SUBR_DIRNAME) {
4475 4482 ASSERT(lastdir == -1);
4476 4483 src = (uintptr_t)".";
4477 4484 lastdir = 0;
4478 4485 } else {
4479 4486 firstbase = 0;
4480 4487 }
4481 4488 }
4482 4489
4483 4490 if (subr == DIF_SUBR_DIRNAME) {
4484 4491 if (lastdir == -1) {
4485 4492 /*
4486 4493 * We know that we have a slash in the name --
4487 4494 * or lastdir would be set to 0, above. And
4488 4495 * because lastdir is -1, we know that this
4489 4496 * slash must be the first character. (That
4490 4497 * is, the full string must be of the form
4491 4498 * "/basename".) In this case, the last
4492 4499 * character of the directory name is 0.
4493 4500 */
4494 4501 lastdir = 0;
4495 4502 }
4496 4503
4497 4504 start = 0;
4498 4505 end = lastdir;
4499 4506 } else {
4500 4507 ASSERT(subr == DIF_SUBR_BASENAME);
4501 4508 ASSERT(firstbase != -1 && lastbase != -1);
4502 4509 start = firstbase;
4503 4510 end = lastbase;
4504 4511 }
4505 4512
4506 4513 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4507 4514 dest[j] = dtrace_load8(src + i);
4508 4515
4509 4516 dest[j] = '\0';
4510 4517 regs[rd] = (uintptr_t)dest;
4511 4518 mstate->dtms_scratch_ptr += size;
4512 4519 break;
4513 4520 }
4514 4521
4515 4522 case DIF_SUBR_GETF: {
4516 4523 uintptr_t fd = tupregs[0].dttk_value;
4517 4524 uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
4518 4525 file_t *fp;
4519 4526
4520 4527 if (!dtrace_priv_proc(state, mstate)) {
4521 4528 regs[rd] = NULL;
4522 4529 break;
4523 4530 }
4524 4531
4525 4532 /*
4526 4533 * This is safe because fi_nfiles only increases, and the
4527 4534 * fi_list array is not freed when the array size doubles.
4528 4535 * (See the comment in flist_grow() for details on the
4529 4536 * management of the u_finfo structure.)
4530 4537 */
4531 4538 fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
4532 4539
4533 4540 mstate->dtms_getf = fp;
4534 4541 regs[rd] = (uintptr_t)fp;
4535 4542 break;
4536 4543 }
4537 4544
4538 4545 case DIF_SUBR_CLEANPATH: {
4539 4546 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4540 4547 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4541 4548 uintptr_t src = tupregs[0].dttk_value;
4542 4549 int i = 0, j = 0;
4543 4550 zone_t *z;
4544 4551
4545 4552 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4546 4553 regs[rd] = NULL;
4547 4554 break;
4548 4555 }
4549 4556
4550 4557 if (!DTRACE_INSCRATCH(mstate, size)) {
4551 4558 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4552 4559 regs[rd] = NULL;
4553 4560 break;
4554 4561 }
4555 4562
4556 4563 /*
4557 4564 * Move forward, loading each character.
4558 4565 */
4559 4566 do {
4560 4567 c = dtrace_load8(src + i++);
4561 4568 next:
4562 4569 if (j + 5 >= size) /* 5 = strlen("/..c\0") */
4563 4570 break;
4564 4571
4565 4572 if (c != '/') {
4566 4573 dest[j++] = c;
4567 4574 continue;
4568 4575 }
4569 4576
4570 4577 c = dtrace_load8(src + i++);
4571 4578
4572 4579 if (c == '/') {
4573 4580 /*
4574 4581 * We have two slashes -- we can just advance
4575 4582 * to the next character.
4576 4583 */
4577 4584 goto next;
4578 4585 }
4579 4586
4580 4587 if (c != '.') {
4581 4588 /*
4582 4589 * This is not "." and it's not ".." -- we can
4583 4590 * just store the "/" and this character and
4584 4591 * drive on.
4585 4592 */
4586 4593 dest[j++] = '/';
4587 4594 dest[j++] = c;
4588 4595 continue;
4589 4596 }
4590 4597
4591 4598 c = dtrace_load8(src + i++);
4592 4599
4593 4600 if (c == '/') {
4594 4601 /*
4595 4602 * This is a "/./" component. We're not going
4596 4603 * to store anything in the destination buffer;
4597 4604 * we're just going to go to the next component.
4598 4605 */
4599 4606 goto next;
4600 4607 }
4601 4608
4602 4609 if (c != '.') {
4603 4610 /*
4604 4611 * This is not ".." -- we can just store the
4605 4612 * "/." and this character and continue
4606 4613 * processing.
4607 4614 */
4608 4615 dest[j++] = '/';
4609 4616 dest[j++] = '.';
4610 4617 dest[j++] = c;
4611 4618 continue;
4612 4619 }
4613 4620
4614 4621 c = dtrace_load8(src + i++);
4615 4622
4616 4623 if (c != '/' && c != '\0') {
4617 4624 /*
4618 4625 * This is not ".." -- it's "..[mumble]".
4619 4626 * We'll store the "/.." and this character
4620 4627 * and continue processing.
4621 4628 */
4622 4629 dest[j++] = '/';
4623 4630 dest[j++] = '.';
4624 4631 dest[j++] = '.';
4625 4632 dest[j++] = c;
4626 4633 continue;
4627 4634 }
4628 4635
4629 4636 /*
4630 4637 * This is "/../" or "/..\0". We need to back up
4631 4638 * our destination pointer until we find a "/".
4632 4639 */
4633 4640 i--;
4634 4641 while (j != 0 && dest[--j] != '/')
4635 4642 continue;
4636 4643
4637 4644 if (c == '\0')
4638 4645 dest[++j] = '/';
4639 4646 } while (c != '\0');
4640 4647
4641 4648 dest[j] = '\0';
4642 4649
4643 4650 if (mstate->dtms_getf != NULL &&
4644 4651 !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
4645 4652 (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
4646 4653 /*
4647 4654 * If we've done a getf() as a part of this ECB and we
4648 4655 * don't have kernel access (and we're not in the global
4649 4656 * zone), check if the path we cleaned up begins with
4650 4657 * the zone's root path, and trim it off if so. Note
4651 4658 * that this is an output cleanliness issue, not a
4652 4659 * security issue: knowing one's zone root path does
4653 4660 * not enable privilege escalation.
4654 4661 */
4655 4662 if (strstr(dest, z->zone_rootpath) == dest)
4656 4663 dest += strlen(z->zone_rootpath) - 1;
4657 4664 }
4658 4665
4659 4666 regs[rd] = (uintptr_t)dest;
4660 4667 mstate->dtms_scratch_ptr += size;
4661 4668 break;
4662 4669 }
4663 4670
4664 4671 case DIF_SUBR_INET_NTOA:
4665 4672 case DIF_SUBR_INET_NTOA6:
4666 4673 case DIF_SUBR_INET_NTOP: {
4667 4674 size_t size;
4668 4675 int af, argi, i;
4669 4676 char *base, *end;
4670 4677
4671 4678 if (subr == DIF_SUBR_INET_NTOP) {
4672 4679 af = (int)tupregs[0].dttk_value;
4673 4680 argi = 1;
4674 4681 } else {
4675 4682 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4676 4683 argi = 0;
4677 4684 }
4678 4685
4679 4686 if (af == AF_INET) {
4680 4687 ipaddr_t ip4;
4681 4688 uint8_t *ptr8, val;
4682 4689
4683 4690 /*
4684 4691 * Safely load the IPv4 address.
4685 4692 */
4686 4693 ip4 = dtrace_load32(tupregs[argi].dttk_value);
4687 4694
4688 4695 /*
4689 4696 * Check an IPv4 string will fit in scratch.
4690 4697 */
4691 4698 size = INET_ADDRSTRLEN;
4692 4699 if (!DTRACE_INSCRATCH(mstate, size)) {
4693 4700 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4694 4701 regs[rd] = NULL;
4695 4702 break;
4696 4703 }
4697 4704 base = (char *)mstate->dtms_scratch_ptr;
4698 4705 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4699 4706
4700 4707 /*
4701 4708 * Stringify as a dotted decimal quad.
4702 4709 */
4703 4710 *end-- = '\0';
4704 4711 ptr8 = (uint8_t *)&ip4;
4705 4712 for (i = 3; i >= 0; i--) {
4706 4713 val = ptr8[i];
4707 4714
4708 4715 if (val == 0) {
4709 4716 *end-- = '0';
4710 4717 } else {
4711 4718 for (; val; val /= 10) {
4712 4719 *end-- = '0' + (val % 10);
4713 4720 }
4714 4721 }
4715 4722
4716 4723 if (i > 0)
4717 4724 *end-- = '.';
4718 4725 }
4719 4726 ASSERT(end + 1 >= base);
4720 4727
4721 4728 } else if (af == AF_INET6) {
4722 4729 struct in6_addr ip6;
4723 4730 int firstzero, tryzero, numzero, v6end;
4724 4731 uint16_t val;
4725 4732 const char digits[] = "0123456789abcdef";
4726 4733
4727 4734 /*
4728 4735 * Stringify using RFC 1884 convention 2 - 16 bit
4729 4736 * hexadecimal values with a zero-run compression.
4730 4737 * Lower case hexadecimal digits are used.
4731 4738 * eg, fe80::214:4fff:fe0b:76c8.
4732 4739 * The IPv4 embedded form is returned for inet_ntop,
4733 4740 * just the IPv4 string is returned for inet_ntoa6.
4734 4741 */
4735 4742
4736 4743 /*
4737 4744 * Safely load the IPv6 address.
4738 4745 */
4739 4746 dtrace_bcopy(
4740 4747 (void *)(uintptr_t)tupregs[argi].dttk_value,
4741 4748 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
4742 4749
4743 4750 /*
4744 4751 * Check an IPv6 string will fit in scratch.
4745 4752 */
4746 4753 size = INET6_ADDRSTRLEN;
4747 4754 if (!DTRACE_INSCRATCH(mstate, size)) {
4748 4755 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4749 4756 regs[rd] = NULL;
4750 4757 break;
4751 4758 }
4752 4759 base = (char *)mstate->dtms_scratch_ptr;
4753 4760 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4754 4761 *end-- = '\0';
4755 4762
4756 4763 /*
4757 4764 * Find the longest run of 16 bit zero values
4758 4765 * for the single allowed zero compression - "::".
4759 4766 */
4760 4767 firstzero = -1;
4761 4768 tryzero = -1;
4762 4769 numzero = 1;
4763 4770 for (i = 0; i < sizeof (struct in6_addr); i++) {
4764 4771 if (ip6._S6_un._S6_u8[i] == 0 &&
4765 4772 tryzero == -1 && i % 2 == 0) {
4766 4773 tryzero = i;
4767 4774 continue;
4768 4775 }
4769 4776
4770 4777 if (tryzero != -1 &&
4771 4778 (ip6._S6_un._S6_u8[i] != 0 ||
4772 4779 i == sizeof (struct in6_addr) - 1)) {
4773 4780
4774 4781 if (i - tryzero <= numzero) {
4775 4782 tryzero = -1;
4776 4783 continue;
4777 4784 }
4778 4785
4779 4786 firstzero = tryzero;
4780 4787 numzero = i - i % 2 - tryzero;
4781 4788 tryzero = -1;
4782 4789
4783 4790 if (ip6._S6_un._S6_u8[i] == 0 &&
4784 4791 i == sizeof (struct in6_addr) - 1)
4785 4792 numzero += 2;
4786 4793 }
4787 4794 }
4788 4795 ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
4789 4796
4790 4797 /*
4791 4798 * Check for an IPv4 embedded address.
4792 4799 */
4793 4800 v6end = sizeof (struct in6_addr) - 2;
4794 4801 if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
4795 4802 IN6_IS_ADDR_V4COMPAT(&ip6)) {
4796 4803 for (i = sizeof (struct in6_addr) - 1;
4797 4804 i >= DTRACE_V4MAPPED_OFFSET; i--) {
4798 4805 ASSERT(end >= base);
4799 4806
4800 4807 val = ip6._S6_un._S6_u8[i];
4801 4808
4802 4809 if (val == 0) {
4803 4810 *end-- = '0';
4804 4811 } else {
4805 4812 for (; val; val /= 10) {
4806 4813 *end-- = '0' + val % 10;
4807 4814 }
4808 4815 }
4809 4816
4810 4817 if (i > DTRACE_V4MAPPED_OFFSET)
4811 4818 *end-- = '.';
4812 4819 }
4813 4820
4814 4821 if (subr == DIF_SUBR_INET_NTOA6)
4815 4822 goto inetout;
4816 4823
4817 4824 /*
4818 4825 * Set v6end to skip the IPv4 address that
4819 4826 * we have already stringified.
4820 4827 */
4821 4828 v6end = 10;
4822 4829 }
4823 4830
4824 4831 /*
4825 4832 * Build the IPv6 string by working through the
4826 4833 * address in reverse.
4827 4834 */
4828 4835 for (i = v6end; i >= 0; i -= 2) {
4829 4836 ASSERT(end >= base);
4830 4837
4831 4838 if (i == firstzero + numzero - 2) {
4832 4839 *end-- = ':';
4833 4840 *end-- = ':';
4834 4841 i -= numzero - 2;
4835 4842 continue;
4836 4843 }
4837 4844
4838 4845 if (i < 14 && i != firstzero - 2)
4839 4846 *end-- = ':';
4840 4847
4841 4848 val = (ip6._S6_un._S6_u8[i] << 8) +
4842 4849 ip6._S6_un._S6_u8[i + 1];
4843 4850
4844 4851 if (val == 0) {
4845 4852 *end-- = '0';
4846 4853 } else {
4847 4854 for (; val; val /= 16) {
4848 4855 *end-- = digits[val % 16];
4849 4856 }
4850 4857 }
4851 4858 }
4852 4859 ASSERT(end + 1 >= base);
4853 4860
4854 4861 } else {
4855 4862 /*
4856 4863 * The user didn't use AH_INET or AH_INET6.
4857 4864 */
4858 4865 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
4859 4866 regs[rd] = NULL;
4860 4867 break;
4861 4868 }
4862 4869
4863 4870 inetout: regs[rd] = (uintptr_t)end + 1;
4864 4871 mstate->dtms_scratch_ptr += size;
4865 4872 break;
4866 4873 }
4867 4874
4868 4875 }
4869 4876 }
4870 4877
4871 4878 /*
4872 4879 * Emulate the execution of DTrace IR instructions specified by the given
4873 4880 * DIF object. This function is deliberately void of assertions as all of
4874 4881 * the necessary checks are handled by a call to dtrace_difo_validate().
4875 4882 */
4876 4883 static uint64_t
4877 4884 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
4878 4885 dtrace_vstate_t *vstate, dtrace_state_t *state)
4879 4886 {
4880 4887 const dif_instr_t *text = difo->dtdo_buf;
4881 4888 const uint_t textlen = difo->dtdo_len;
4882 4889 const char *strtab = difo->dtdo_strtab;
4883 4890 const uint64_t *inttab = difo->dtdo_inttab;
4884 4891
4885 4892 uint64_t rval = 0;
4886 4893 dtrace_statvar_t *svar;
4887 4894 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
4888 4895 dtrace_difv_t *v;
4889 4896 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
4890 4897 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
4891 4898
4892 4899 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
4893 4900 uint64_t regs[DIF_DIR_NREGS];
4894 4901 uint64_t *tmp;
4895 4902
4896 4903 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
4897 4904 int64_t cc_r;
4898 4905 uint_t pc = 0, id, opc;
4899 4906 uint8_t ttop = 0;
4900 4907 dif_instr_t instr;
4901 4908 uint_t r1, r2, rd;
4902 4909
4903 4910 /*
4904 4911 * We stash the current DIF object into the machine state: we need it
4905 4912 * for subsequent access checking.
4906 4913 */
4907 4914 mstate->dtms_difo = difo;
4908 4915
4909 4916 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
4910 4917
4911 4918 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
4912 4919 opc = pc;
4913 4920
4914 4921 instr = text[pc++];
4915 4922 r1 = DIF_INSTR_R1(instr);
4916 4923 r2 = DIF_INSTR_R2(instr);
4917 4924 rd = DIF_INSTR_RD(instr);
4918 4925
4919 4926 switch (DIF_INSTR_OP(instr)) {
4920 4927 case DIF_OP_OR:
4921 4928 regs[rd] = regs[r1] | regs[r2];
4922 4929 break;
4923 4930 case DIF_OP_XOR:
4924 4931 regs[rd] = regs[r1] ^ regs[r2];
4925 4932 break;
4926 4933 case DIF_OP_AND:
4927 4934 regs[rd] = regs[r1] & regs[r2];
4928 4935 break;
4929 4936 case DIF_OP_SLL:
4930 4937 regs[rd] = regs[r1] << regs[r2];
4931 4938 break;
4932 4939 case DIF_OP_SRL:
4933 4940 regs[rd] = regs[r1] >> regs[r2];
4934 4941 break;
4935 4942 case DIF_OP_SUB:
4936 4943 regs[rd] = regs[r1] - regs[r2];
4937 4944 break;
4938 4945 case DIF_OP_ADD:
4939 4946 regs[rd] = regs[r1] + regs[r2];
4940 4947 break;
4941 4948 case DIF_OP_MUL:
4942 4949 regs[rd] = regs[r1] * regs[r2];
4943 4950 break;
4944 4951 case DIF_OP_SDIV:
4945 4952 if (regs[r2] == 0) {
4946 4953 regs[rd] = 0;
4947 4954 *flags |= CPU_DTRACE_DIVZERO;
4948 4955 } else {
4949 4956 regs[rd] = (int64_t)regs[r1] /
4950 4957 (int64_t)regs[r2];
4951 4958 }
4952 4959 break;
4953 4960
4954 4961 case DIF_OP_UDIV:
4955 4962 if (regs[r2] == 0) {
4956 4963 regs[rd] = 0;
4957 4964 *flags |= CPU_DTRACE_DIVZERO;
4958 4965 } else {
4959 4966 regs[rd] = regs[r1] / regs[r2];
4960 4967 }
4961 4968 break;
4962 4969
4963 4970 case DIF_OP_SREM:
4964 4971 if (regs[r2] == 0) {
4965 4972 regs[rd] = 0;
4966 4973 *flags |= CPU_DTRACE_DIVZERO;
4967 4974 } else {
4968 4975 regs[rd] = (int64_t)regs[r1] %
4969 4976 (int64_t)regs[r2];
4970 4977 }
4971 4978 break;
4972 4979
4973 4980 case DIF_OP_UREM:
4974 4981 if (regs[r2] == 0) {
4975 4982 regs[rd] = 0;
4976 4983 *flags |= CPU_DTRACE_DIVZERO;
4977 4984 } else {
4978 4985 regs[rd] = regs[r1] % regs[r2];
4979 4986 }
4980 4987 break;
4981 4988
4982 4989 case DIF_OP_NOT:
4983 4990 regs[rd] = ~regs[r1];
4984 4991 break;
4985 4992 case DIF_OP_MOV:
4986 4993 regs[rd] = regs[r1];
4987 4994 break;
4988 4995 case DIF_OP_CMP:
4989 4996 cc_r = regs[r1] - regs[r2];
4990 4997 cc_n = cc_r < 0;
4991 4998 cc_z = cc_r == 0;
4992 4999 cc_v = 0;
4993 5000 cc_c = regs[r1] < regs[r2];
4994 5001 break;
4995 5002 case DIF_OP_TST:
4996 5003 cc_n = cc_v = cc_c = 0;
4997 5004 cc_z = regs[r1] == 0;
4998 5005 break;
4999 5006 case DIF_OP_BA:
5000 5007 pc = DIF_INSTR_LABEL(instr);
5001 5008 break;
5002 5009 case DIF_OP_BE:
5003 5010 if (cc_z)
5004 5011 pc = DIF_INSTR_LABEL(instr);
5005 5012 break;
5006 5013 case DIF_OP_BNE:
5007 5014 if (cc_z == 0)
5008 5015 pc = DIF_INSTR_LABEL(instr);
5009 5016 break;
5010 5017 case DIF_OP_BG:
5011 5018 if ((cc_z | (cc_n ^ cc_v)) == 0)
5012 5019 pc = DIF_INSTR_LABEL(instr);
5013 5020 break;
5014 5021 case DIF_OP_BGU:
5015 5022 if ((cc_c | cc_z) == 0)
5016 5023 pc = DIF_INSTR_LABEL(instr);
5017 5024 break;
5018 5025 case DIF_OP_BGE:
5019 5026 if ((cc_n ^ cc_v) == 0)
5020 5027 pc = DIF_INSTR_LABEL(instr);
5021 5028 break;
5022 5029 case DIF_OP_BGEU:
5023 5030 if (cc_c == 0)
5024 5031 pc = DIF_INSTR_LABEL(instr);
5025 5032 break;
5026 5033 case DIF_OP_BL:
5027 5034 if (cc_n ^ cc_v)
5028 5035 pc = DIF_INSTR_LABEL(instr);
5029 5036 break;
5030 5037 case DIF_OP_BLU:
5031 5038 if (cc_c)
5032 5039 pc = DIF_INSTR_LABEL(instr);
5033 5040 break;
5034 5041 case DIF_OP_BLE:
5035 5042 if (cc_z | (cc_n ^ cc_v))
5036 5043 pc = DIF_INSTR_LABEL(instr);
5037 5044 break;
5038 5045 case DIF_OP_BLEU:
5039 5046 if (cc_c | cc_z)
5040 5047 pc = DIF_INSTR_LABEL(instr);
5041 5048 break;
5042 5049 case DIF_OP_RLDSB:
5043 5050 if (!dtrace_canload(regs[r1], 1, mstate, vstate))
5044 5051 break;
5045 5052 /*FALLTHROUGH*/
5046 5053 case DIF_OP_LDSB:
5047 5054 regs[rd] = (int8_t)dtrace_load8(regs[r1]);
5048 5055 break;
5049 5056 case DIF_OP_RLDSH:
5050 5057 if (!dtrace_canload(regs[r1], 2, mstate, vstate))
5051 5058 break;
5052 5059 /*FALLTHROUGH*/
5053 5060 case DIF_OP_LDSH:
5054 5061 regs[rd] = (int16_t)dtrace_load16(regs[r1]);
5055 5062 break;
5056 5063 case DIF_OP_RLDSW:
5057 5064 if (!dtrace_canload(regs[r1], 4, mstate, vstate))
5058 5065 break;
5059 5066 /*FALLTHROUGH*/
5060 5067 case DIF_OP_LDSW:
5061 5068 regs[rd] = (int32_t)dtrace_load32(regs[r1]);
5062 5069 break;
5063 5070 case DIF_OP_RLDUB:
5064 5071 if (!dtrace_canload(regs[r1], 1, mstate, vstate))
5065 5072 break;
5066 5073 /*FALLTHROUGH*/
5067 5074 case DIF_OP_LDUB:
5068 5075 regs[rd] = dtrace_load8(regs[r1]);
5069 5076 break;
5070 5077 case DIF_OP_RLDUH:
5071 5078 if (!dtrace_canload(regs[r1], 2, mstate, vstate))
5072 5079 break;
5073 5080 /*FALLTHROUGH*/
5074 5081 case DIF_OP_LDUH:
5075 5082 regs[rd] = dtrace_load16(regs[r1]);
5076 5083 break;
5077 5084 case DIF_OP_RLDUW:
5078 5085 if (!dtrace_canload(regs[r1], 4, mstate, vstate))
5079 5086 break;
5080 5087 /*FALLTHROUGH*/
5081 5088 case DIF_OP_LDUW:
5082 5089 regs[rd] = dtrace_load32(regs[r1]);
5083 5090 break;
5084 5091 case DIF_OP_RLDX:
5085 5092 if (!dtrace_canload(regs[r1], 8, mstate, vstate))
5086 5093 break;
5087 5094 /*FALLTHROUGH*/
5088 5095 case DIF_OP_LDX:
5089 5096 regs[rd] = dtrace_load64(regs[r1]);
5090 5097 break;
5091 5098 case DIF_OP_ULDSB:
5092 5099 regs[rd] = (int8_t)
5093 5100 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5094 5101 break;
5095 5102 case DIF_OP_ULDSH:
5096 5103 regs[rd] = (int16_t)
5097 5104 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5098 5105 break;
5099 5106 case DIF_OP_ULDSW:
5100 5107 regs[rd] = (int32_t)
5101 5108 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5102 5109 break;
5103 5110 case DIF_OP_ULDUB:
5104 5111 regs[rd] =
5105 5112 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5106 5113 break;
5107 5114 case DIF_OP_ULDUH:
5108 5115 regs[rd] =
5109 5116 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5110 5117 break;
5111 5118 case DIF_OP_ULDUW:
5112 5119 regs[rd] =
5113 5120 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5114 5121 break;
5115 5122 case DIF_OP_ULDX:
5116 5123 regs[rd] =
5117 5124 dtrace_fuword64((void *)(uintptr_t)regs[r1]);
5118 5125 break;
5119 5126 case DIF_OP_RET:
5120 5127 rval = regs[rd];
5121 5128 pc = textlen;
5122 5129 break;
5123 5130 case DIF_OP_NOP:
5124 5131 break;
5125 5132 case DIF_OP_SETX:
5126 5133 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
5127 5134 break;
5128 5135 case DIF_OP_SETS:
5129 5136 regs[rd] = (uint64_t)(uintptr_t)
5130 5137 (strtab + DIF_INSTR_STRING(instr));
5131 5138 break;
5132 5139 case DIF_OP_SCMP: {
5133 5140 size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
5134 5141 uintptr_t s1 = regs[r1];
5135 5142 uintptr_t s2 = regs[r2];
5136 5143
5137 5144 if (s1 != NULL &&
5138 5145 !dtrace_strcanload(s1, sz, mstate, vstate))
5139 5146 break;
5140 5147 if (s2 != NULL &&
5141 5148 !dtrace_strcanload(s2, sz, mstate, vstate))
5142 5149 break;
5143 5150
5144 5151 cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz);
5145 5152
5146 5153 cc_n = cc_r < 0;
5147 5154 cc_z = cc_r == 0;
5148 5155 cc_v = cc_c = 0;
5149 5156 break;
5150 5157 }
5151 5158 case DIF_OP_LDGA:
5152 5159 regs[rd] = dtrace_dif_variable(mstate, state,
5153 5160 r1, regs[r2]);
5154 5161 break;
5155 5162 case DIF_OP_LDGS:
5156 5163 id = DIF_INSTR_VAR(instr);
5157 5164
5158 5165 if (id >= DIF_VAR_OTHER_UBASE) {
5159 5166 uintptr_t a;
5160 5167
5161 5168 id -= DIF_VAR_OTHER_UBASE;
5162 5169 svar = vstate->dtvs_globals[id];
5163 5170 ASSERT(svar != NULL);
5164 5171 v = &svar->dtsv_var;
5165 5172
5166 5173 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
5167 5174 regs[rd] = svar->dtsv_data;
5168 5175 break;
5169 5176 }
5170 5177
5171 5178 a = (uintptr_t)svar->dtsv_data;
5172 5179
5173 5180 if (*(uint8_t *)a == UINT8_MAX) {
5174 5181 /*
5175 5182 * If the 0th byte is set to UINT8_MAX
5176 5183 * then this is to be treated as a
5177 5184 * reference to a NULL variable.
5178 5185 */
5179 5186 regs[rd] = NULL;
5180 5187 } else {
5181 5188 regs[rd] = a + sizeof (uint64_t);
5182 5189 }
5183 5190
5184 5191 break;
5185 5192 }
5186 5193
5187 5194 regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
5188 5195 break;
5189 5196
5190 5197 case DIF_OP_STGS:
5191 5198 id = DIF_INSTR_VAR(instr);
5192 5199
5193 5200 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5194 5201 id -= DIF_VAR_OTHER_UBASE;
5195 5202
5196 5203 svar = vstate->dtvs_globals[id];
5197 5204 ASSERT(svar != NULL);
5198 5205 v = &svar->dtsv_var;
5199 5206
5200 5207 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5201 5208 uintptr_t a = (uintptr_t)svar->dtsv_data;
5202 5209
5203 5210 ASSERT(a != NULL);
5204 5211 ASSERT(svar->dtsv_size != 0);
5205 5212
5206 5213 if (regs[rd] == NULL) {
5207 5214 *(uint8_t *)a = UINT8_MAX;
5208 5215 break;
5209 5216 } else {
5210 5217 *(uint8_t *)a = 0;
5211 5218 a += sizeof (uint64_t);
5212 5219 }
5213 5220 if (!dtrace_vcanload(
5214 5221 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5215 5222 mstate, vstate))
5216 5223 break;
5217 5224
5218 5225 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5219 5226 (void *)a, &v->dtdv_type);
5220 5227 break;
5221 5228 }
5222 5229
5223 5230 svar->dtsv_data = regs[rd];
5224 5231 break;
5225 5232
5226 5233 case DIF_OP_LDTA:
5227 5234 /*
5228 5235 * There are no DTrace built-in thread-local arrays at
5229 5236 * present. This opcode is saved for future work.
5230 5237 */
5231 5238 *flags |= CPU_DTRACE_ILLOP;
5232 5239 regs[rd] = 0;
5233 5240 break;
5234 5241
5235 5242 case DIF_OP_LDLS:
5236 5243 id = DIF_INSTR_VAR(instr);
5237 5244
5238 5245 if (id < DIF_VAR_OTHER_UBASE) {
5239 5246 /*
5240 5247 * For now, this has no meaning.
5241 5248 */
5242 5249 regs[rd] = 0;
5243 5250 break;
5244 5251 }
5245 5252
5246 5253 id -= DIF_VAR_OTHER_UBASE;
5247 5254
5248 5255 ASSERT(id < vstate->dtvs_nlocals);
5249 5256 ASSERT(vstate->dtvs_locals != NULL);
5250 5257
5251 5258 svar = vstate->dtvs_locals[id];
5252 5259 ASSERT(svar != NULL);
5253 5260 v = &svar->dtsv_var;
5254 5261
5255 5262 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5256 5263 uintptr_t a = (uintptr_t)svar->dtsv_data;
5257 5264 size_t sz = v->dtdv_type.dtdt_size;
5258 5265
5259 5266 sz += sizeof (uint64_t);
5260 5267 ASSERT(svar->dtsv_size == NCPU * sz);
5261 5268 a += CPU->cpu_id * sz;
5262 5269
5263 5270 if (*(uint8_t *)a == UINT8_MAX) {
5264 5271 /*
5265 5272 * If the 0th byte is set to UINT8_MAX
5266 5273 * then this is to be treated as a
5267 5274 * reference to a NULL variable.
5268 5275 */
5269 5276 regs[rd] = NULL;
5270 5277 } else {
5271 5278 regs[rd] = a + sizeof (uint64_t);
5272 5279 }
5273 5280
5274 5281 break;
5275 5282 }
5276 5283
5277 5284 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
5278 5285 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5279 5286 regs[rd] = tmp[CPU->cpu_id];
5280 5287 break;
5281 5288
5282 5289 case DIF_OP_STLS:
5283 5290 id = DIF_INSTR_VAR(instr);
5284 5291
5285 5292 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5286 5293 id -= DIF_VAR_OTHER_UBASE;
5287 5294 ASSERT(id < vstate->dtvs_nlocals);
5288 5295
5289 5296 ASSERT(vstate->dtvs_locals != NULL);
5290 5297 svar = vstate->dtvs_locals[id];
5291 5298 ASSERT(svar != NULL);
5292 5299 v = &svar->dtsv_var;
5293 5300
5294 5301 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5295 5302 uintptr_t a = (uintptr_t)svar->dtsv_data;
5296 5303 size_t sz = v->dtdv_type.dtdt_size;
5297 5304
5298 5305 sz += sizeof (uint64_t);
5299 5306 ASSERT(svar->dtsv_size == NCPU * sz);
5300 5307 a += CPU->cpu_id * sz;
5301 5308
5302 5309 if (regs[rd] == NULL) {
5303 5310 *(uint8_t *)a = UINT8_MAX;
5304 5311 break;
5305 5312 } else {
5306 5313 *(uint8_t *)a = 0;
5307 5314 a += sizeof (uint64_t);
5308 5315 }
5309 5316
5310 5317 if (!dtrace_vcanload(
5311 5318 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5312 5319 mstate, vstate))
5313 5320 break;
5314 5321
5315 5322 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5316 5323 (void *)a, &v->dtdv_type);
5317 5324 break;
5318 5325 }
5319 5326
5320 5327 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
5321 5328 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5322 5329 tmp[CPU->cpu_id] = regs[rd];
5323 5330 break;
5324 5331
5325 5332 case DIF_OP_LDTS: {
5326 5333 dtrace_dynvar_t *dvar;
5327 5334 dtrace_key_t *key;
5328 5335
5329 5336 id = DIF_INSTR_VAR(instr);
5330 5337 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5331 5338 id -= DIF_VAR_OTHER_UBASE;
5332 5339 v = &vstate->dtvs_tlocals[id];
5333 5340
5334 5341 key = &tupregs[DIF_DTR_NREGS];
5335 5342 key[0].dttk_value = (uint64_t)id;
5336 5343 key[0].dttk_size = 0;
5337 5344 DTRACE_TLS_THRKEY(key[1].dttk_value);
5338 5345 key[1].dttk_size = 0;
5339 5346
5340 5347 dvar = dtrace_dynvar(dstate, 2, key,
5341 5348 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
5342 5349 mstate, vstate);
5343 5350
5344 5351 if (dvar == NULL) {
5345 5352 regs[rd] = 0;
5346 5353 break;
5347 5354 }
5348 5355
5349 5356 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5350 5357 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5351 5358 } else {
5352 5359 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5353 5360 }
5354 5361
5355 5362 break;
5356 5363 }
5357 5364
5358 5365 case DIF_OP_STTS: {
5359 5366 dtrace_dynvar_t *dvar;
5360 5367 dtrace_key_t *key;
5361 5368
5362 5369 id = DIF_INSTR_VAR(instr);
5363 5370 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5364 5371 id -= DIF_VAR_OTHER_UBASE;
5365 5372
5366 5373 key = &tupregs[DIF_DTR_NREGS];
5367 5374 key[0].dttk_value = (uint64_t)id;
5368 5375 key[0].dttk_size = 0;
5369 5376 DTRACE_TLS_THRKEY(key[1].dttk_value);
5370 5377 key[1].dttk_size = 0;
5371 5378 v = &vstate->dtvs_tlocals[id];
5372 5379
5373 5380 dvar = dtrace_dynvar(dstate, 2, key,
5374 5381 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5375 5382 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5376 5383 regs[rd] ? DTRACE_DYNVAR_ALLOC :
5377 5384 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
5378 5385
5379 5386 /*
5380 5387 * Given that we're storing to thread-local data,
5381 5388 * we need to flush our predicate cache.
5382 5389 */
5383 5390 curthread->t_predcache = NULL;
5384 5391
5385 5392 if (dvar == NULL)
5386 5393 break;
5387 5394
5388 5395 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5389 5396 if (!dtrace_vcanload(
5390 5397 (void *)(uintptr_t)regs[rd],
5391 5398 &v->dtdv_type, mstate, vstate))
5392 5399 break;
5393 5400
5394 5401 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5395 5402 dvar->dtdv_data, &v->dtdv_type);
5396 5403 } else {
5397 5404 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5398 5405 }
5399 5406
5400 5407 break;
5401 5408 }
5402 5409
5403 5410 case DIF_OP_SRA:
5404 5411 regs[rd] = (int64_t)regs[r1] >> regs[r2];
5405 5412 break;
5406 5413
5407 5414 case DIF_OP_CALL:
5408 5415 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
5409 5416 regs, tupregs, ttop, mstate, state);
5410 5417 break;
5411 5418
5412 5419 case DIF_OP_PUSHTR:
5413 5420 if (ttop == DIF_DTR_NREGS) {
5414 5421 *flags |= CPU_DTRACE_TUPOFLOW;
5415 5422 break;
5416 5423 }
5417 5424
5418 5425 if (r1 == DIF_TYPE_STRING) {
5419 5426 /*
5420 5427 * If this is a string type and the size is 0,
5421 5428 * we'll use the system-wide default string
5422 5429 * size. Note that we are _not_ looking at
5423 5430 * the value of the DTRACEOPT_STRSIZE option;
5424 5431 * had this been set, we would expect to have
5425 5432 * a non-zero size value in the "pushtr".
5426 5433 */
5427 5434 tupregs[ttop].dttk_size =
5428 5435 dtrace_strlen((char *)(uintptr_t)regs[rd],
5429 5436 regs[r2] ? regs[r2] :
5430 5437 dtrace_strsize_default) + 1;
5431 5438 } else {
5432 5439 tupregs[ttop].dttk_size = regs[r2];
5433 5440 }
5434 5441
5435 5442 tupregs[ttop++].dttk_value = regs[rd];
5436 5443 break;
5437 5444
5438 5445 case DIF_OP_PUSHTV:
5439 5446 if (ttop == DIF_DTR_NREGS) {
5440 5447 *flags |= CPU_DTRACE_TUPOFLOW;
5441 5448 break;
5442 5449 }
5443 5450
5444 5451 tupregs[ttop].dttk_value = regs[rd];
5445 5452 tupregs[ttop++].dttk_size = 0;
5446 5453 break;
5447 5454
5448 5455 case DIF_OP_POPTS:
5449 5456 if (ttop != 0)
5450 5457 ttop--;
5451 5458 break;
5452 5459
5453 5460 case DIF_OP_FLUSHTS:
5454 5461 ttop = 0;
5455 5462 break;
5456 5463
5457 5464 case DIF_OP_LDGAA:
5458 5465 case DIF_OP_LDTAA: {
5459 5466 dtrace_dynvar_t *dvar;
5460 5467 dtrace_key_t *key = tupregs;
5461 5468 uint_t nkeys = ttop;
5462 5469
5463 5470 id = DIF_INSTR_VAR(instr);
5464 5471 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5465 5472 id -= DIF_VAR_OTHER_UBASE;
5466 5473
5467 5474 key[nkeys].dttk_value = (uint64_t)id;
5468 5475 key[nkeys++].dttk_size = 0;
5469 5476
5470 5477 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
5471 5478 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5472 5479 key[nkeys++].dttk_size = 0;
5473 5480 v = &vstate->dtvs_tlocals[id];
5474 5481 } else {
5475 5482 v = &vstate->dtvs_globals[id]->dtsv_var;
5476 5483 }
5477 5484
5478 5485 dvar = dtrace_dynvar(dstate, nkeys, key,
5479 5486 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5480 5487 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5481 5488 DTRACE_DYNVAR_NOALLOC, mstate, vstate);
5482 5489
5483 5490 if (dvar == NULL) {
5484 5491 regs[rd] = 0;
5485 5492 break;
5486 5493 }
5487 5494
5488 5495 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5489 5496 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5490 5497 } else {
5491 5498 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5492 5499 }
5493 5500
5494 5501 break;
5495 5502 }
5496 5503
5497 5504 case DIF_OP_STGAA:
5498 5505 case DIF_OP_STTAA: {
5499 5506 dtrace_dynvar_t *dvar;
5500 5507 dtrace_key_t *key = tupregs;
5501 5508 uint_t nkeys = ttop;
5502 5509
5503 5510 id = DIF_INSTR_VAR(instr);
5504 5511 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5505 5512 id -= DIF_VAR_OTHER_UBASE;
5506 5513
5507 5514 key[nkeys].dttk_value = (uint64_t)id;
5508 5515 key[nkeys++].dttk_size = 0;
5509 5516
5510 5517 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
5511 5518 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5512 5519 key[nkeys++].dttk_size = 0;
5513 5520 v = &vstate->dtvs_tlocals[id];
5514 5521 } else {
5515 5522 v = &vstate->dtvs_globals[id]->dtsv_var;
5516 5523 }
5517 5524
5518 5525 dvar = dtrace_dynvar(dstate, nkeys, key,
5519 5526 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5520 5527 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5521 5528 regs[rd] ? DTRACE_DYNVAR_ALLOC :
5522 5529 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
5523 5530
5524 5531 if (dvar == NULL)
5525 5532 break;
5526 5533
5527 5534 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5528 5535 if (!dtrace_vcanload(
5529 5536 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5530 5537 mstate, vstate))
5531 5538 break;
5532 5539
5533 5540 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5534 5541 dvar->dtdv_data, &v->dtdv_type);
5535 5542 } else {
5536 5543 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5537 5544 }
5538 5545
5539 5546 break;
5540 5547 }
5541 5548
5542 5549 case DIF_OP_ALLOCS: {
5543 5550 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5544 5551 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
5545 5552
5546 5553 /*
5547 5554 * Rounding up the user allocation size could have
5548 5555 * overflowed large, bogus allocations (like -1ULL) to
5549 5556 * 0.
5550 5557 */
5551 5558 if (size < regs[r1] ||
5552 5559 !DTRACE_INSCRATCH(mstate, size)) {
5553 5560 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5554 5561 regs[rd] = NULL;
5555 5562 break;
5556 5563 }
5557 5564
5558 5565 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
5559 5566 mstate->dtms_scratch_ptr += size;
5560 5567 regs[rd] = ptr;
5561 5568 break;
5562 5569 }
5563 5570
5564 5571 case DIF_OP_COPYS:
5565 5572 if (!dtrace_canstore(regs[rd], regs[r2],
5566 5573 mstate, vstate)) {
5567 5574 *flags |= CPU_DTRACE_BADADDR;
5568 5575 *illval = regs[rd];
5569 5576 break;
5570 5577 }
5571 5578
5572 5579 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
5573 5580 break;
5574 5581
5575 5582 dtrace_bcopy((void *)(uintptr_t)regs[r1],
5576 5583 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
5577 5584 break;
5578 5585
5579 5586 case DIF_OP_STB:
5580 5587 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
5581 5588 *flags |= CPU_DTRACE_BADADDR;
5582 5589 *illval = regs[rd];
5583 5590 break;
5584 5591 }
5585 5592 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
5586 5593 break;
5587 5594
5588 5595 case DIF_OP_STH:
5589 5596 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
5590 5597 *flags |= CPU_DTRACE_BADADDR;
5591 5598 *illval = regs[rd];
5592 5599 break;
5593 5600 }
5594 5601 if (regs[rd] & 1) {
5595 5602 *flags |= CPU_DTRACE_BADALIGN;
5596 5603 *illval = regs[rd];
5597 5604 break;
5598 5605 }
5599 5606 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
5600 5607 break;
5601 5608
5602 5609 case DIF_OP_STW:
5603 5610 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
5604 5611 *flags |= CPU_DTRACE_BADADDR;
5605 5612 *illval = regs[rd];
5606 5613 break;
5607 5614 }
5608 5615 if (regs[rd] & 3) {
5609 5616 *flags |= CPU_DTRACE_BADALIGN;
5610 5617 *illval = regs[rd];
5611 5618 break;
5612 5619 }
5613 5620 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
5614 5621 break;
5615 5622
5616 5623 case DIF_OP_STX:
5617 5624 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
5618 5625 *flags |= CPU_DTRACE_BADADDR;
5619 5626 *illval = regs[rd];
5620 5627 break;
5621 5628 }
5622 5629 if (regs[rd] & 7) {
5623 5630 *flags |= CPU_DTRACE_BADALIGN;
5624 5631 *illval = regs[rd];
5625 5632 break;
5626 5633 }
5627 5634 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
5628 5635 break;
5629 5636 }
5630 5637 }
5631 5638
5632 5639 if (!(*flags & CPU_DTRACE_FAULT))
5633 5640 return (rval);
5634 5641
5635 5642 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
5636 5643 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
5637 5644
5638 5645 return (0);
5639 5646 }
5640 5647
5641 5648 static void
5642 5649 dtrace_action_breakpoint(dtrace_ecb_t *ecb)
5643 5650 {
5644 5651 dtrace_probe_t *probe = ecb->dte_probe;
5645 5652 dtrace_provider_t *prov = probe->dtpr_provider;
5646 5653 char c[DTRACE_FULLNAMELEN + 80], *str;
5647 5654 char *msg = "dtrace: breakpoint action at probe ";
5648 5655 char *ecbmsg = " (ecb ";
5649 5656 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
5650 5657 uintptr_t val = (uintptr_t)ecb;
5651 5658 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
5652 5659
5653 5660 if (dtrace_destructive_disallow)
5654 5661 return;
5655 5662
5656 5663 /*
5657 5664 * It's impossible to be taking action on the NULL probe.
5658 5665 */
5659 5666 ASSERT(probe != NULL);
5660 5667
5661 5668 /*
5662 5669 * This is a poor man's (destitute man's?) sprintf(): we want to
5663 5670 * print the provider name, module name, function name and name of
5664 5671 * the probe, along with the hex address of the ECB with the breakpoint
5665 5672 * action -- all of which we must place in the character buffer by
5666 5673 * hand.
5667 5674 */
5668 5675 while (*msg != '\0')
5669 5676 c[i++] = *msg++;
5670 5677
5671 5678 for (str = prov->dtpv_name; *str != '\0'; str++)
5672 5679 c[i++] = *str;
5673 5680 c[i++] = ':';
5674 5681
5675 5682 for (str = probe->dtpr_mod; *str != '\0'; str++)
5676 5683 c[i++] = *str;
5677 5684 c[i++] = ':';
5678 5685
5679 5686 for (str = probe->dtpr_func; *str != '\0'; str++)
5680 5687 c[i++] = *str;
5681 5688 c[i++] = ':';
5682 5689
5683 5690 for (str = probe->dtpr_name; *str != '\0'; str++)
5684 5691 c[i++] = *str;
5685 5692
5686 5693 while (*ecbmsg != '\0')
5687 5694 c[i++] = *ecbmsg++;
5688 5695
5689 5696 while (shift >= 0) {
5690 5697 mask = (uintptr_t)0xf << shift;
5691 5698
5692 5699 if (val >= ((uintptr_t)1 << shift))
5693 5700 c[i++] = "0123456789abcdef"[(val & mask) >> shift];
5694 5701 shift -= 4;
5695 5702 }
5696 5703
5697 5704 c[i++] = ')';
5698 5705 c[i] = '\0';
5699 5706
5700 5707 debug_enter(c);
5701 5708 }
5702 5709
5703 5710 static void
5704 5711 dtrace_action_panic(dtrace_ecb_t *ecb)
5705 5712 {
5706 5713 dtrace_probe_t *probe = ecb->dte_probe;
5707 5714
5708 5715 /*
5709 5716 * It's impossible to be taking action on the NULL probe.
5710 5717 */
5711 5718 ASSERT(probe != NULL);
5712 5719
5713 5720 if (dtrace_destructive_disallow)
5714 5721 return;
5715 5722
5716 5723 if (dtrace_panicked != NULL)
5717 5724 return;
5718 5725
5719 5726 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
5720 5727 return;
5721 5728
5722 5729 /*
5723 5730 * We won the right to panic. (We want to be sure that only one
5724 5731 * thread calls panic() from dtrace_probe(), and that panic() is
5725 5732 * called exactly once.)
5726 5733 */
5727 5734 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5728 5735 probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
5729 5736 probe->dtpr_func, probe->dtpr_name, (void *)ecb);
5730 5737 }
5731 5738
5732 5739 static void
5733 5740 dtrace_action_raise(uint64_t sig)
5734 5741 {
5735 5742 if (dtrace_destructive_disallow)
5736 5743 return;
5737 5744
5738 5745 if (sig >= NSIG) {
5739 5746 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5740 5747 return;
5741 5748 }
5742 5749
5743 5750 /*
5744 5751 * raise() has a queue depth of 1 -- we ignore all subsequent
5745 5752 * invocations of the raise() action.
5746 5753 */
5747 5754 if (curthread->t_dtrace_sig == 0)
5748 5755 curthread->t_dtrace_sig = (uint8_t)sig;
5749 5756
5750 5757 curthread->t_sig_check = 1;
5751 5758 aston(curthread);
5752 5759 }
5753 5760
5754 5761 static void
5755 5762 dtrace_action_stop(void)
5756 5763 {
5757 5764 if (dtrace_destructive_disallow)
5758 5765 return;
5759 5766
5760 5767 if (!curthread->t_dtrace_stop) {
5761 5768 curthread->t_dtrace_stop = 1;
5762 5769 curthread->t_sig_check = 1;
5763 5770 aston(curthread);
5764 5771 }
5765 5772 }
5766 5773
5767 5774 static void
5768 5775 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
5769 5776 {
5770 5777 hrtime_t now;
5771 5778 volatile uint16_t *flags;
5772 5779 cpu_t *cpu = CPU;
5773 5780
5774 5781 if (dtrace_destructive_disallow)
5775 5782 return;
5776 5783
5777 5784 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
5778 5785
5779 5786 now = dtrace_gethrtime();
5780 5787
5781 5788 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
5782 5789 /*
5783 5790 * We need to advance the mark to the current time.
5784 5791 */
5785 5792 cpu->cpu_dtrace_chillmark = now;
5786 5793 cpu->cpu_dtrace_chilled = 0;
5787 5794 }
5788 5795
5789 5796 /*
5790 5797 * Now check to see if the requested chill time would take us over
5791 5798 * the maximum amount of time allowed in the chill interval. (Or
5792 5799 * worse, if the calculation itself induces overflow.)
5793 5800 */
5794 5801 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
5795 5802 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
5796 5803 *flags |= CPU_DTRACE_ILLOP;
5797 5804 return;
5798 5805 }
5799 5806
5800 5807 while (dtrace_gethrtime() - now < val)
5801 5808 continue;
5802 5809
5803 5810 /*
5804 5811 * Normally, we assure that the value of the variable "timestamp" does
5805 5812 * not change within an ECB. The presence of chill() represents an
5806 5813 * exception to this rule, however.
5807 5814 */
5808 5815 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
5809 5816 cpu->cpu_dtrace_chilled += val;
5810 5817 }
5811 5818
5812 5819 static void
5813 5820 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
5814 5821 uint64_t *buf, uint64_t arg)
5815 5822 {
5816 5823 int nframes = DTRACE_USTACK_NFRAMES(arg);
5817 5824 int strsize = DTRACE_USTACK_STRSIZE(arg);
5818 5825 uint64_t *pcs = &buf[1], *fps;
5819 5826 char *str = (char *)&pcs[nframes];
5820 5827 int size, offs = 0, i, j;
5821 5828 uintptr_t old = mstate->dtms_scratch_ptr, saved;
5822 5829 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
5823 5830 char *sym;
5824 5831
5825 5832 /*
5826 5833 * Should be taking a faster path if string space has not been
5827 5834 * allocated.
5828 5835 */
5829 5836 ASSERT(strsize != 0);
5830 5837
5831 5838 /*
5832 5839 * We will first allocate some temporary space for the frame pointers.
5833 5840 */
5834 5841 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5835 5842 size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
5836 5843 (nframes * sizeof (uint64_t));
5837 5844
5838 5845 if (!DTRACE_INSCRATCH(mstate, size)) {
5839 5846 /*
5840 5847 * Not enough room for our frame pointers -- need to indicate
5841 5848 * that we ran out of scratch space.
5842 5849 */
5843 5850 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5844 5851 return;
5845 5852 }
5846 5853
5847 5854 mstate->dtms_scratch_ptr += size;
5848 5855 saved = mstate->dtms_scratch_ptr;
5849 5856
5850 5857 /*
5851 5858 * Now get a stack with both program counters and frame pointers.
5852 5859 */
5853 5860 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5854 5861 dtrace_getufpstack(buf, fps, nframes + 1);
5855 5862 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5856 5863
5857 5864 /*
5858 5865 * If that faulted, we're cooked.
5859 5866 */
5860 5867 if (*flags & CPU_DTRACE_FAULT)
5861 5868 goto out;
5862 5869
5863 5870 /*
5864 5871 * Now we want to walk up the stack, calling the USTACK helper. For
5865 5872 * each iteration, we restore the scratch pointer.
5866 5873 */
5867 5874 for (i = 0; i < nframes; i++) {
5868 5875 mstate->dtms_scratch_ptr = saved;
5869 5876
5870 5877 if (offs >= strsize)
5871 5878 break;
5872 5879
5873 5880 sym = (char *)(uintptr_t)dtrace_helper(
5874 5881 DTRACE_HELPER_ACTION_USTACK,
5875 5882 mstate, state, pcs[i], fps[i]);
5876 5883
5877 5884 /*
5878 5885 * If we faulted while running the helper, we're going to
5879 5886 * clear the fault and null out the corresponding string.
5880 5887 */
5881 5888 if (*flags & CPU_DTRACE_FAULT) {
5882 5889 *flags &= ~CPU_DTRACE_FAULT;
5883 5890 str[offs++] = '\0';
5884 5891 continue;
5885 5892 }
5886 5893
5887 5894 if (sym == NULL) {
5888 5895 str[offs++] = '\0';
5889 5896 continue;
5890 5897 }
5891 5898
5892 5899 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5893 5900
5894 5901 /*
5895 5902 * Now copy in the string that the helper returned to us.
5896 5903 */
5897 5904 for (j = 0; offs + j < strsize; j++) {
5898 5905 if ((str[offs + j] = sym[j]) == '\0')
5899 5906 break;
5900 5907 }
5901 5908
5902 5909 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5903 5910
5904 5911 offs += j + 1;
5905 5912 }
5906 5913
5907 5914 if (offs >= strsize) {
5908 5915 /*
5909 5916 * If we didn't have room for all of the strings, we don't
5910 5917 * abort processing -- this needn't be a fatal error -- but we
5911 5918 * still want to increment a counter (dts_stkstroverflows) to
5912 5919 * allow this condition to be warned about. (If this is from
5913 5920 * a jstack() action, it is easily tuned via jstackstrsize.)
5914 5921 */
5915 5922 dtrace_error(&state->dts_stkstroverflows);
5916 5923 }
5917 5924
5918 5925 while (offs < strsize)
5919 5926 str[offs++] = '\0';
5920 5927
5921 5928 out:
5922 5929 mstate->dtms_scratch_ptr = old;
5923 5930 }
5924 5931
5925 5932 /*
5926 5933 * If you're looking for the epicenter of DTrace, you just found it. This
5927 5934 * is the function called by the provider to fire a probe -- from which all
5928 5935 * subsequent probe-context DTrace activity emanates.
5929 5936 */
5930 5937 void
5931 5938 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
5932 5939 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
5933 5940 {
5934 5941 processorid_t cpuid;
5935 5942 dtrace_icookie_t cookie;
5936 5943 dtrace_probe_t *probe;
5937 5944 dtrace_mstate_t mstate;
5938 5945 dtrace_ecb_t *ecb;
5939 5946 dtrace_action_t *act;
5940 5947 intptr_t offs;
5941 5948 size_t size;
5942 5949 int vtime, onintr;
5943 5950 volatile uint16_t *flags;
5944 5951 hrtime_t now;
5945 5952
5946 5953 /*
5947 5954 * Kick out immediately if this CPU is still being born (in which case
5948 5955 * curthread will be set to -1) or the current thread can't allow
5949 5956 * probes in its current context.
5950 5957 */
5951 5958 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
5952 5959 return;
5953 5960
5954 5961 cookie = dtrace_interrupt_disable();
5955 5962 probe = dtrace_probes[id - 1];
5956 5963 cpuid = CPU->cpu_id;
5957 5964 onintr = CPU_ON_INTR(CPU);
5958 5965
5959 5966 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
5960 5967 probe->dtpr_predcache == curthread->t_predcache) {
5961 5968 /*
5962 5969 * We have hit in the predicate cache; we know that
5963 5970 * this predicate would evaluate to be false.
5964 5971 */
5965 5972 dtrace_interrupt_enable(cookie);
5966 5973 return;
5967 5974 }
5968 5975
5969 5976 if (panic_quiesce) {
5970 5977 /*
5971 5978 * We don't trace anything if we're panicking.
5972 5979 */
5973 5980 dtrace_interrupt_enable(cookie);
5974 5981 return;
5975 5982 }
5976 5983
5977 5984 now = dtrace_gethrtime();
5978 5985 vtime = dtrace_vtime_references != 0;
5979 5986
5980 5987 if (vtime && curthread->t_dtrace_start)
5981 5988 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
5982 5989
5983 5990 mstate.dtms_difo = NULL;
5984 5991 mstate.dtms_probe = probe;
5985 5992 mstate.dtms_strtok = NULL;
5986 5993 mstate.dtms_arg[0] = arg0;
5987 5994 mstate.dtms_arg[1] = arg1;
5988 5995 mstate.dtms_arg[2] = arg2;
5989 5996 mstate.dtms_arg[3] = arg3;
5990 5997 mstate.dtms_arg[4] = arg4;
5991 5998
5992 5999 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
5993 6000
5994 6001 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
5995 6002 dtrace_predicate_t *pred = ecb->dte_predicate;
5996 6003 dtrace_state_t *state = ecb->dte_state;
5997 6004 dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
5998 6005 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
5999 6006 dtrace_vstate_t *vstate = &state->dts_vstate;
6000 6007 dtrace_provider_t *prov = probe->dtpr_provider;
6001 6008 uint64_t tracememsize = 0;
6002 6009 int committed = 0;
6003 6010 caddr_t tomax;
6004 6011
6005 6012 /*
6006 6013 * A little subtlety with the following (seemingly innocuous)
6007 6014 * declaration of the automatic 'val': by looking at the
6008 6015 * code, you might think that it could be declared in the
6009 6016 * action processing loop, below. (That is, it's only used in
6010 6017 * the action processing loop.) However, it must be declared
6011 6018 * out of that scope because in the case of DIF expression
6012 6019 * arguments to aggregating actions, one iteration of the
6013 6020 * action loop will use the last iteration's value.
6014 6021 */
6015 6022 #ifdef lint
6016 6023 uint64_t val = 0;
6017 6024 #else
6018 6025 uint64_t val;
6019 6026 #endif
6020 6027
6021 6028 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
6022 6029 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
6023 6030 mstate.dtms_getf = NULL;
6024 6031
6025 6032 *flags &= ~CPU_DTRACE_ERROR;
6026 6033
6027 6034 if (prov == dtrace_provider) {
6028 6035 /*
6029 6036 * If dtrace itself is the provider of this probe,
6030 6037 * we're only going to continue processing the ECB if
6031 6038 * arg0 (the dtrace_state_t) is equal to the ECB's
6032 6039 * creating state. (This prevents disjoint consumers
6033 6040 * from seeing one another's metaprobes.)
6034 6041 */
6035 6042 if (arg0 != (uint64_t)(uintptr_t)state)
6036 6043 continue;
6037 6044 }
6038 6045
6039 6046 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
6040 6047 /*
6041 6048 * We're not currently active. If our provider isn't
6042 6049 * the dtrace pseudo provider, we're not interested.
6043 6050 */
6044 6051 if (prov != dtrace_provider)
6045 6052 continue;
6046 6053
6047 6054 /*
6048 6055 * Now we must further check if we are in the BEGIN
6049 6056 * probe. If we are, we will only continue processing
6050 6057 * if we're still in WARMUP -- if one BEGIN enabling
6051 6058 * has invoked the exit() action, we don't want to
6052 6059 * evaluate subsequent BEGIN enablings.
6053 6060 */
6054 6061 if (probe->dtpr_id == dtrace_probeid_begin &&
6055 6062 state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
6056 6063 ASSERT(state->dts_activity ==
6057 6064 DTRACE_ACTIVITY_DRAINING);
6058 6065 continue;
6059 6066 }
6060 6067 }
6061 6068
6062 6069 if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
6063 6070 continue;
6064 6071
6065 6072 if (now - state->dts_alive > dtrace_deadman_timeout) {
6066 6073 /*
6067 6074 * We seem to be dead. Unless we (a) have kernel
6068 6075 * destructive permissions (b) have expicitly enabled
6069 6076 * destructive actions and (c) destructive actions have
6070 6077 * not been disabled, we're going to transition into
6071 6078 * the KILLED state, from which no further processing
6072 6079 * on this state will be performed.
6073 6080 */
6074 6081 if (!dtrace_priv_kernel_destructive(state) ||
6075 6082 !state->dts_cred.dcr_destructive ||
6076 6083 dtrace_destructive_disallow) {
6077 6084 void *activity = &state->dts_activity;
6078 6085 dtrace_activity_t current;
6079 6086
6080 6087 do {
6081 6088 current = state->dts_activity;
6082 6089 } while (dtrace_cas32(activity, current,
6083 6090 DTRACE_ACTIVITY_KILLED) != current);
6084 6091
6085 6092 continue;
6086 6093 }
6087 6094 }
6088 6095
6089 6096 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
6090 6097 ecb->dte_alignment, state, &mstate)) < 0)
6091 6098 continue;
6092 6099
6093 6100 tomax = buf->dtb_tomax;
6094 6101 ASSERT(tomax != NULL);
6095 6102
6096 6103 if (ecb->dte_size != 0)
6097 6104 DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid);
6098 6105
6099 6106 mstate.dtms_epid = ecb->dte_epid;
6100 6107 mstate.dtms_present |= DTRACE_MSTATE_EPID;
6101 6108
6102 6109 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
6103 6110 mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
6104 6111
6105 6112 if (pred != NULL) {
6106 6113 dtrace_difo_t *dp = pred->dtp_difo;
6107 6114 int rval;
6108 6115
6109 6116 rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
6110 6117
6111 6118 if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
6112 6119 dtrace_cacheid_t cid = probe->dtpr_predcache;
6113 6120
6114 6121 if (cid != DTRACE_CACHEIDNONE && !onintr) {
6115 6122 /*
6116 6123 * Update the predicate cache...
6117 6124 */
6118 6125 ASSERT(cid == pred->dtp_cacheid);
6119 6126 curthread->t_predcache = cid;
6120 6127 }
6121 6128
6122 6129 continue;
6123 6130 }
6124 6131 }
6125 6132
6126 6133 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
6127 6134 act != NULL; act = act->dta_next) {
6128 6135 size_t valoffs;
6129 6136 dtrace_difo_t *dp;
6130 6137 dtrace_recdesc_t *rec = &act->dta_rec;
6131 6138
6132 6139 size = rec->dtrd_size;
6133 6140 valoffs = offs + rec->dtrd_offset;
6134 6141
6135 6142 if (DTRACEACT_ISAGG(act->dta_kind)) {
6136 6143 uint64_t v = 0xbad;
6137 6144 dtrace_aggregation_t *agg;
6138 6145
6139 6146 agg = (dtrace_aggregation_t *)act;
6140 6147
6141 6148 if ((dp = act->dta_difo) != NULL)
6142 6149 v = dtrace_dif_emulate(dp,
6143 6150 &mstate, vstate, state);
6144 6151
6145 6152 if (*flags & CPU_DTRACE_ERROR)
6146 6153 continue;
6147 6154
6148 6155 /*
6149 6156 * Note that we always pass the expression
6150 6157 * value from the previous iteration of the
6151 6158 * action loop. This value will only be used
6152 6159 * if there is an expression argument to the
6153 6160 * aggregating action, denoted by the
6154 6161 * dtag_hasarg field.
6155 6162 */
6156 6163 dtrace_aggregate(agg, buf,
6157 6164 offs, aggbuf, v, val);
6158 6165 continue;
6159 6166 }
6160 6167
6161 6168 switch (act->dta_kind) {
6162 6169 case DTRACEACT_STOP:
6163 6170 if (dtrace_priv_proc_destructive(state,
6164 6171 &mstate))
6165 6172 dtrace_action_stop();
6166 6173 continue;
6167 6174
6168 6175 case DTRACEACT_BREAKPOINT:
6169 6176 if (dtrace_priv_kernel_destructive(state))
6170 6177 dtrace_action_breakpoint(ecb);
6171 6178 continue;
6172 6179
6173 6180 case DTRACEACT_PANIC:
6174 6181 if (dtrace_priv_kernel_destructive(state))
6175 6182 dtrace_action_panic(ecb);
6176 6183 continue;
6177 6184
6178 6185 case DTRACEACT_STACK:
6179 6186 if (!dtrace_priv_kernel(state))
6180 6187 continue;
6181 6188
6182 6189 dtrace_getpcstack((pc_t *)(tomax + valoffs),
6183 6190 size / sizeof (pc_t), probe->dtpr_aframes,
6184 6191 DTRACE_ANCHORED(probe) ? NULL :
6185 6192 (uint32_t *)arg0);
6186 6193
6187 6194 continue;
6188 6195
6189 6196 case DTRACEACT_JSTACK:
6190 6197 case DTRACEACT_USTACK:
6191 6198 if (!dtrace_priv_proc(state, &mstate))
6192 6199 continue;
6193 6200
6194 6201 /*
6195 6202 * See comment in DIF_VAR_PID.
6196 6203 */
6197 6204 if (DTRACE_ANCHORED(mstate.dtms_probe) &&
6198 6205 CPU_ON_INTR(CPU)) {
6199 6206 int depth = DTRACE_USTACK_NFRAMES(
6200 6207 rec->dtrd_arg) + 1;
6201 6208
6202 6209 dtrace_bzero((void *)(tomax + valoffs),
6203 6210 DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
6204 6211 + depth * sizeof (uint64_t));
6205 6212
6206 6213 continue;
6207 6214 }
6208 6215
6209 6216 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
6210 6217 curproc->p_dtrace_helpers != NULL) {
6211 6218 /*
6212 6219 * This is the slow path -- we have
6213 6220 * allocated string space, and we're
6214 6221 * getting the stack of a process that
6215 6222 * has helpers. Call into a separate
6216 6223 * routine to perform this processing.
6217 6224 */
6218 6225 dtrace_action_ustack(&mstate, state,
6219 6226 (uint64_t *)(tomax + valoffs),
6220 6227 rec->dtrd_arg);
6221 6228 continue;
6222 6229 }
6223 6230
6224 6231 /*
6225 6232 * Clear the string space, since there's no
6226 6233 * helper to do it for us.
6227 6234 */
6228 6235 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
6229 6236 int depth = DTRACE_USTACK_NFRAMES(
6230 6237 rec->dtrd_arg);
6231 6238 size_t strsize = DTRACE_USTACK_STRSIZE(
6232 6239 rec->dtrd_arg);
6233 6240 uint64_t *buf = (uint64_t *)(tomax +
6234 6241 valoffs);
6235 6242 void *strspace = &buf[depth + 1];
6236 6243
6237 6244 dtrace_bzero(strspace,
6238 6245 MIN(depth, strsize));
6239 6246 }
6240 6247
6241 6248 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6242 6249 dtrace_getupcstack((uint64_t *)
6243 6250 (tomax + valoffs),
6244 6251 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
6245 6252 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6246 6253 continue;
6247 6254
6248 6255 default:
6249 6256 break;
6250 6257 }
6251 6258
6252 6259 dp = act->dta_difo;
6253 6260 ASSERT(dp != NULL);
6254 6261
6255 6262 val = dtrace_dif_emulate(dp, &mstate, vstate, state);
6256 6263
6257 6264 if (*flags & CPU_DTRACE_ERROR)
6258 6265 continue;
6259 6266
6260 6267 switch (act->dta_kind) {
6261 6268 case DTRACEACT_SPECULATE:
6262 6269 ASSERT(buf == &state->dts_buffer[cpuid]);
6263 6270 buf = dtrace_speculation_buffer(state,
6264 6271 cpuid, val);
6265 6272
6266 6273 if (buf == NULL) {
6267 6274 *flags |= CPU_DTRACE_DROP;
6268 6275 continue;
6269 6276 }
6270 6277
6271 6278 offs = dtrace_buffer_reserve(buf,
6272 6279 ecb->dte_needed, ecb->dte_alignment,
6273 6280 state, NULL);
6274 6281
6275 6282 if (offs < 0) {
6276 6283 *flags |= CPU_DTRACE_DROP;
6277 6284 continue;
6278 6285 }
6279 6286
6280 6287 tomax = buf->dtb_tomax;
6281 6288 ASSERT(tomax != NULL);
6282 6289
6283 6290 if (ecb->dte_size != 0)
6284 6291 DTRACE_STORE(uint32_t, tomax, offs,
6285 6292 ecb->dte_epid);
6286 6293 continue;
6287 6294
6288 6295 case DTRACEACT_CHILL:
6289 6296 if (dtrace_priv_kernel_destructive(state))
6290 6297 dtrace_action_chill(&mstate, val);
6291 6298 continue;
6292 6299
6293 6300 case DTRACEACT_RAISE:
6294 6301 if (dtrace_priv_proc_destructive(state,
6295 6302 &mstate))
6296 6303 dtrace_action_raise(val);
6297 6304 continue;
6298 6305
6299 6306 case DTRACEACT_COMMIT:
6300 6307 ASSERT(!committed);
6301 6308
6302 6309 /*
6303 6310 * We need to commit our buffer state.
6304 6311 */
6305 6312 if (ecb->dte_size)
6306 6313 buf->dtb_offset = offs + ecb->dte_size;
6307 6314 buf = &state->dts_buffer[cpuid];
6308 6315 dtrace_speculation_commit(state, cpuid, val);
6309 6316 committed = 1;
6310 6317 continue;
6311 6318
6312 6319 case DTRACEACT_DISCARD:
6313 6320 dtrace_speculation_discard(state, cpuid, val);
6314 6321 continue;
6315 6322
6316 6323 case DTRACEACT_DIFEXPR:
6317 6324 case DTRACEACT_LIBACT:
6318 6325 case DTRACEACT_PRINTF:
6319 6326 case DTRACEACT_PRINTA:
6320 6327 case DTRACEACT_SYSTEM:
6321 6328 case DTRACEACT_FREOPEN:
6322 6329 case DTRACEACT_TRACEMEM:
6323 6330 break;
6324 6331
6325 6332 case DTRACEACT_TRACEMEM_DYNSIZE:
6326 6333 tracememsize = val;
6327 6334 break;
6328 6335
6329 6336 case DTRACEACT_SYM:
6330 6337 case DTRACEACT_MOD:
6331 6338 if (!dtrace_priv_kernel(state))
6332 6339 continue;
6333 6340 break;
6334 6341
6335 6342 case DTRACEACT_USYM:
6336 6343 case DTRACEACT_UMOD:
6337 6344 case DTRACEACT_UADDR: {
6338 6345 struct pid *pid = curthread->t_procp->p_pidp;
6339 6346
6340 6347 if (!dtrace_priv_proc(state, &mstate))
6341 6348 continue;
6342 6349
6343 6350 DTRACE_STORE(uint64_t, tomax,
6344 6351 valoffs, (uint64_t)pid->pid_id);
6345 6352 DTRACE_STORE(uint64_t, tomax,
6346 6353 valoffs + sizeof (uint64_t), val);
6347 6354
6348 6355 continue;
6349 6356 }
6350 6357
6351 6358 case DTRACEACT_EXIT: {
6352 6359 /*
6353 6360 * For the exit action, we are going to attempt
6354 6361 * to atomically set our activity to be
6355 6362 * draining. If this fails (either because
6356 6363 * another CPU has beat us to the exit action,
6357 6364 * or because our current activity is something
6358 6365 * other than ACTIVE or WARMUP), we will
6359 6366 * continue. This assures that the exit action
6360 6367 * can be successfully recorded at most once
6361 6368 * when we're in the ACTIVE state. If we're
6362 6369 * encountering the exit() action while in
6363 6370 * COOLDOWN, however, we want to honor the new
6364 6371 * status code. (We know that we're the only
6365 6372 * thread in COOLDOWN, so there is no race.)
6366 6373 */
6367 6374 void *activity = &state->dts_activity;
6368 6375 dtrace_activity_t current = state->dts_activity;
6369 6376
6370 6377 if (current == DTRACE_ACTIVITY_COOLDOWN)
6371 6378 break;
6372 6379
6373 6380 if (current != DTRACE_ACTIVITY_WARMUP)
6374 6381 current = DTRACE_ACTIVITY_ACTIVE;
6375 6382
6376 6383 if (dtrace_cas32(activity, current,
6377 6384 DTRACE_ACTIVITY_DRAINING) != current) {
6378 6385 *flags |= CPU_DTRACE_DROP;
6379 6386 continue;
6380 6387 }
6381 6388
6382 6389 break;
6383 6390 }
6384 6391
6385 6392 default:
6386 6393 ASSERT(0);
6387 6394 }
6388 6395
6389 6396 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
6390 6397 uintptr_t end = valoffs + size;
6391 6398
6392 6399 if (tracememsize != 0 &&
6393 6400 valoffs + tracememsize < end) {
6394 6401 end = valoffs + tracememsize;
6395 6402 tracememsize = 0;
6396 6403 }
6397 6404
6398 6405 if (!dtrace_vcanload((void *)(uintptr_t)val,
6399 6406 &dp->dtdo_rtype, &mstate, vstate))
6400 6407 continue;
6401 6408
6402 6409 /*
6403 6410 * If this is a string, we're going to only
6404 6411 * load until we find the zero byte -- after
6405 6412 * which we'll store zero bytes.
6406 6413 */
6407 6414 if (dp->dtdo_rtype.dtdt_kind ==
6408 6415 DIF_TYPE_STRING) {
6409 6416 char c = '\0' + 1;
6410 6417 int intuple = act->dta_intuple;
6411 6418 size_t s;
6412 6419
6413 6420 for (s = 0; s < size; s++) {
6414 6421 if (c != '\0')
6415 6422 c = dtrace_load8(val++);
6416 6423
6417 6424 DTRACE_STORE(uint8_t, tomax,
6418 6425 valoffs++, c);
6419 6426
6420 6427 if (c == '\0' && intuple)
6421 6428 break;
6422 6429 }
6423 6430
6424 6431 continue;
6425 6432 }
6426 6433
6427 6434 while (valoffs < end) {
6428 6435 DTRACE_STORE(uint8_t, tomax, valoffs++,
6429 6436 dtrace_load8(val++));
6430 6437 }
6431 6438
6432 6439 continue;
6433 6440 }
6434 6441
6435 6442 switch (size) {
6436 6443 case 0:
6437 6444 break;
6438 6445
6439 6446 case sizeof (uint8_t):
6440 6447 DTRACE_STORE(uint8_t, tomax, valoffs, val);
6441 6448 break;
6442 6449 case sizeof (uint16_t):
6443 6450 DTRACE_STORE(uint16_t, tomax, valoffs, val);
6444 6451 break;
6445 6452 case sizeof (uint32_t):
6446 6453 DTRACE_STORE(uint32_t, tomax, valoffs, val);
6447 6454 break;
6448 6455 case sizeof (uint64_t):
6449 6456 DTRACE_STORE(uint64_t, tomax, valoffs, val);
6450 6457 break;
6451 6458 default:
6452 6459 /*
6453 6460 * Any other size should have been returned by
6454 6461 * reference, not by value.
6455 6462 */
6456 6463 ASSERT(0);
6457 6464 break;
6458 6465 }
6459 6466 }
6460 6467
6461 6468 if (*flags & CPU_DTRACE_DROP)
6462 6469 continue;
6463 6470
6464 6471 if (*flags & CPU_DTRACE_FAULT) {
6465 6472 int ndx;
6466 6473 dtrace_action_t *err;
6467 6474
6468 6475 buf->dtb_errors++;
6469 6476
6470 6477 if (probe->dtpr_id == dtrace_probeid_error) {
6471 6478 /*
6472 6479 * There's nothing we can do -- we had an
6473 6480 * error on the error probe. We bump an
6474 6481 * error counter to at least indicate that
6475 6482 * this condition happened.
6476 6483 */
6477 6484 dtrace_error(&state->dts_dblerrors);
6478 6485 continue;
6479 6486 }
6480 6487
6481 6488 if (vtime) {
6482 6489 /*
6483 6490 * Before recursing on dtrace_probe(), we
6484 6491 * need to explicitly clear out our start
6485 6492 * time to prevent it from being accumulated
6486 6493 * into t_dtrace_vtime.
6487 6494 */
6488 6495 curthread->t_dtrace_start = 0;
6489 6496 }
6490 6497
6491 6498 /*
6492 6499 * Iterate over the actions to figure out which action
6493 6500 * we were processing when we experienced the error.
6494 6501 * Note that act points _past_ the faulting action; if
6495 6502 * act is ecb->dte_action, the fault was in the
6496 6503 * predicate, if it's ecb->dte_action->dta_next it's
6497 6504 * in action #1, and so on.
6498 6505 */
6499 6506 for (err = ecb->dte_action, ndx = 0;
6500 6507 err != act; err = err->dta_next, ndx++)
6501 6508 continue;
6502 6509
6503 6510 dtrace_probe_error(state, ecb->dte_epid, ndx,
6504 6511 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
6505 6512 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
6506 6513 cpu_core[cpuid].cpuc_dtrace_illval);
6507 6514
6508 6515 continue;
6509 6516 }
6510 6517
6511 6518 if (!committed)
6512 6519 buf->dtb_offset = offs + ecb->dte_size;
6513 6520 }
6514 6521
6515 6522 if (vtime)
6516 6523 curthread->t_dtrace_start = dtrace_gethrtime();
6517 6524
6518 6525 dtrace_interrupt_enable(cookie);
6519 6526 }
6520 6527
6521 6528 /*
6522 6529 * DTrace Probe Hashing Functions
6523 6530 *
6524 6531 * The functions in this section (and indeed, the functions in remaining
6525 6532 * sections) are not _called_ from probe context. (Any exceptions to this are
6526 6533 * marked with a "Note:".) Rather, they are called from elsewhere in the
6527 6534 * DTrace framework to look-up probes in, add probes to and remove probes from
6528 6535 * the DTrace probe hashes. (Each probe is hashed by each element of the
6529 6536 * probe tuple -- allowing for fast lookups, regardless of what was
6530 6537 * specified.)
6531 6538 */
6532 6539 static uint_t
6533 6540 dtrace_hash_str(char *p)
6534 6541 {
6535 6542 unsigned int g;
6536 6543 uint_t hval = 0;
6537 6544
6538 6545 while (*p) {
6539 6546 hval = (hval << 4) + *p++;
6540 6547 if ((g = (hval & 0xf0000000)) != 0)
6541 6548 hval ^= g >> 24;
6542 6549 hval &= ~g;
6543 6550 }
6544 6551 return (hval);
6545 6552 }
6546 6553
6547 6554 static dtrace_hash_t *
6548 6555 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
6549 6556 {
6550 6557 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
6551 6558
6552 6559 hash->dth_stroffs = stroffs;
6553 6560 hash->dth_nextoffs = nextoffs;
6554 6561 hash->dth_prevoffs = prevoffs;
6555 6562
6556 6563 hash->dth_size = 1;
6557 6564 hash->dth_mask = hash->dth_size - 1;
6558 6565
6559 6566 hash->dth_tab = kmem_zalloc(hash->dth_size *
6560 6567 sizeof (dtrace_hashbucket_t *), KM_SLEEP);
6561 6568
6562 6569 return (hash);
6563 6570 }
6564 6571
6565 6572 static void
6566 6573 dtrace_hash_destroy(dtrace_hash_t *hash)
6567 6574 {
6568 6575 #ifdef DEBUG
6569 6576 int i;
6570 6577
6571 6578 for (i = 0; i < hash->dth_size; i++)
6572 6579 ASSERT(hash->dth_tab[i] == NULL);
6573 6580 #endif
6574 6581
6575 6582 kmem_free(hash->dth_tab,
6576 6583 hash->dth_size * sizeof (dtrace_hashbucket_t *));
6577 6584 kmem_free(hash, sizeof (dtrace_hash_t));
6578 6585 }
6579 6586
6580 6587 static void
6581 6588 dtrace_hash_resize(dtrace_hash_t *hash)
6582 6589 {
6583 6590 int size = hash->dth_size, i, ndx;
6584 6591 int new_size = hash->dth_size << 1;
6585 6592 int new_mask = new_size - 1;
6586 6593 dtrace_hashbucket_t **new_tab, *bucket, *next;
6587 6594
6588 6595 ASSERT((new_size & new_mask) == 0);
6589 6596
6590 6597 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
6591 6598
6592 6599 for (i = 0; i < size; i++) {
6593 6600 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
6594 6601 dtrace_probe_t *probe = bucket->dthb_chain;
6595 6602
6596 6603 ASSERT(probe != NULL);
6597 6604 ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
6598 6605
6599 6606 next = bucket->dthb_next;
6600 6607 bucket->dthb_next = new_tab[ndx];
6601 6608 new_tab[ndx] = bucket;
6602 6609 }
6603 6610 }
6604 6611
6605 6612 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
6606 6613 hash->dth_tab = new_tab;
6607 6614 hash->dth_size = new_size;
6608 6615 hash->dth_mask = new_mask;
6609 6616 }
6610 6617
6611 6618 static void
6612 6619 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
6613 6620 {
6614 6621 int hashval = DTRACE_HASHSTR(hash, new);
6615 6622 int ndx = hashval & hash->dth_mask;
6616 6623 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6617 6624 dtrace_probe_t **nextp, **prevp;
6618 6625
6619 6626 for (; bucket != NULL; bucket = bucket->dthb_next) {
6620 6627 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
6621 6628 goto add;
6622 6629 }
6623 6630
6624 6631 if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
6625 6632 dtrace_hash_resize(hash);
6626 6633 dtrace_hash_add(hash, new);
6627 6634 return;
6628 6635 }
6629 6636
6630 6637 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
6631 6638 bucket->dthb_next = hash->dth_tab[ndx];
6632 6639 hash->dth_tab[ndx] = bucket;
6633 6640 hash->dth_nbuckets++;
6634 6641
6635 6642 add:
6636 6643 nextp = DTRACE_HASHNEXT(hash, new);
6637 6644 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
6638 6645 *nextp = bucket->dthb_chain;
6639 6646
6640 6647 if (bucket->dthb_chain != NULL) {
6641 6648 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
6642 6649 ASSERT(*prevp == NULL);
6643 6650 *prevp = new;
6644 6651 }
6645 6652
6646 6653 bucket->dthb_chain = new;
6647 6654 bucket->dthb_len++;
6648 6655 }
6649 6656
6650 6657 static dtrace_probe_t *
6651 6658 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
6652 6659 {
6653 6660 int hashval = DTRACE_HASHSTR(hash, template);
6654 6661 int ndx = hashval & hash->dth_mask;
6655 6662 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6656 6663
6657 6664 for (; bucket != NULL; bucket = bucket->dthb_next) {
6658 6665 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
6659 6666 return (bucket->dthb_chain);
6660 6667 }
6661 6668
6662 6669 return (NULL);
6663 6670 }
6664 6671
6665 6672 static int
6666 6673 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
6667 6674 {
6668 6675 int hashval = DTRACE_HASHSTR(hash, template);
6669 6676 int ndx = hashval & hash->dth_mask;
6670 6677 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6671 6678
6672 6679 for (; bucket != NULL; bucket = bucket->dthb_next) {
6673 6680 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
6674 6681 return (bucket->dthb_len);
6675 6682 }
6676 6683
6677 6684 return (NULL);
6678 6685 }
6679 6686
6680 6687 static void
6681 6688 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
6682 6689 {
6683 6690 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
6684 6691 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6685 6692
6686 6693 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
6687 6694 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
6688 6695
6689 6696 /*
6690 6697 * Find the bucket that we're removing this probe from.
6691 6698 */
6692 6699 for (; bucket != NULL; bucket = bucket->dthb_next) {
6693 6700 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
6694 6701 break;
6695 6702 }
6696 6703
6697 6704 ASSERT(bucket != NULL);
6698 6705
6699 6706 if (*prevp == NULL) {
6700 6707 if (*nextp == NULL) {
6701 6708 /*
6702 6709 * The removed probe was the only probe on this
6703 6710 * bucket; we need to remove the bucket.
6704 6711 */
6705 6712 dtrace_hashbucket_t *b = hash->dth_tab[ndx];
6706 6713
6707 6714 ASSERT(bucket->dthb_chain == probe);
6708 6715 ASSERT(b != NULL);
6709 6716
6710 6717 if (b == bucket) {
6711 6718 hash->dth_tab[ndx] = bucket->dthb_next;
6712 6719 } else {
6713 6720 while (b->dthb_next != bucket)
6714 6721 b = b->dthb_next;
6715 6722 b->dthb_next = bucket->dthb_next;
6716 6723 }
6717 6724
6718 6725 ASSERT(hash->dth_nbuckets > 0);
6719 6726 hash->dth_nbuckets--;
6720 6727 kmem_free(bucket, sizeof (dtrace_hashbucket_t));
6721 6728 return;
6722 6729 }
6723 6730
6724 6731 bucket->dthb_chain = *nextp;
6725 6732 } else {
6726 6733 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
6727 6734 }
6728 6735
6729 6736 if (*nextp != NULL)
6730 6737 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
6731 6738 }
6732 6739
6733 6740 /*
6734 6741 * DTrace Utility Functions
6735 6742 *
6736 6743 * These are random utility functions that are _not_ called from probe context.
6737 6744 */
6738 6745 static int
6739 6746 dtrace_badattr(const dtrace_attribute_t *a)
6740 6747 {
6741 6748 return (a->dtat_name > DTRACE_STABILITY_MAX ||
6742 6749 a->dtat_data > DTRACE_STABILITY_MAX ||
6743 6750 a->dtat_class > DTRACE_CLASS_MAX);
6744 6751 }
6745 6752
6746 6753 /*
6747 6754 * Return a duplicate copy of a string. If the specified string is NULL,
6748 6755 * this function returns a zero-length string.
6749 6756 */
6750 6757 static char *
6751 6758 dtrace_strdup(const char *str)
6752 6759 {
6753 6760 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
6754 6761
6755 6762 if (str != NULL)
6756 6763 (void) strcpy(new, str);
6757 6764
6758 6765 return (new);
6759 6766 }
6760 6767
6761 6768 #define DTRACE_ISALPHA(c) \
6762 6769 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6763 6770
6764 6771 static int
6765 6772 dtrace_badname(const char *s)
6766 6773 {
6767 6774 char c;
6768 6775
6769 6776 if (s == NULL || (c = *s++) == '\0')
6770 6777 return (0);
6771 6778
6772 6779 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
6773 6780 return (1);
6774 6781
6775 6782 while ((c = *s++) != '\0') {
6776 6783 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
6777 6784 c != '-' && c != '_' && c != '.' && c != '`')
6778 6785 return (1);
6779 6786 }
6780 6787
6781 6788 return (0);
6782 6789 }
6783 6790
6784 6791 static void
6785 6792 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
6786 6793 {
6787 6794 uint32_t priv;
6788 6795
6789 6796 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
6790 6797 /*
6791 6798 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6792 6799 */
6793 6800 priv = DTRACE_PRIV_ALL;
6794 6801 } else {
6795 6802 *uidp = crgetuid(cr);
6796 6803 *zoneidp = crgetzoneid(cr);
6797 6804
6798 6805 priv = 0;
6799 6806 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
6800 6807 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
6801 6808 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
6802 6809 priv |= DTRACE_PRIV_USER;
6803 6810 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
6804 6811 priv |= DTRACE_PRIV_PROC;
6805 6812 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
6806 6813 priv |= DTRACE_PRIV_OWNER;
6807 6814 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
6808 6815 priv |= DTRACE_PRIV_ZONEOWNER;
6809 6816 }
6810 6817
6811 6818 *privp = priv;
6812 6819 }
6813 6820
6814 6821 #ifdef DTRACE_ERRDEBUG
6815 6822 static void
6816 6823 dtrace_errdebug(const char *str)
6817 6824 {
6818 6825 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ;
6819 6826 int occupied = 0;
6820 6827
6821 6828 mutex_enter(&dtrace_errlock);
6822 6829 dtrace_errlast = str;
6823 6830 dtrace_errthread = curthread;
6824 6831
6825 6832 while (occupied++ < DTRACE_ERRHASHSZ) {
6826 6833 if (dtrace_errhash[hval].dter_msg == str) {
6827 6834 dtrace_errhash[hval].dter_count++;
6828 6835 goto out;
6829 6836 }
6830 6837
6831 6838 if (dtrace_errhash[hval].dter_msg != NULL) {
6832 6839 hval = (hval + 1) % DTRACE_ERRHASHSZ;
6833 6840 continue;
6834 6841 }
6835 6842
6836 6843 dtrace_errhash[hval].dter_msg = str;
6837 6844 dtrace_errhash[hval].dter_count = 1;
6838 6845 goto out;
6839 6846 }
6840 6847
6841 6848 panic("dtrace: undersized error hash");
6842 6849 out:
6843 6850 mutex_exit(&dtrace_errlock);
6844 6851 }
6845 6852 #endif
6846 6853
6847 6854 /*
6848 6855 * DTrace Matching Functions
6849 6856 *
6850 6857 * These functions are used to match groups of probes, given some elements of
6851 6858 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6852 6859 */
6853 6860 static int
6854 6861 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
6855 6862 zoneid_t zoneid)
6856 6863 {
6857 6864 if (priv != DTRACE_PRIV_ALL) {
6858 6865 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
6859 6866 uint32_t match = priv & ppriv;
6860 6867
6861 6868 /*
6862 6869 * No PRIV_DTRACE_* privileges...
6863 6870 */
6864 6871 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
6865 6872 DTRACE_PRIV_KERNEL)) == 0)
6866 6873 return (0);
6867 6874
6868 6875 /*
6869 6876 * No matching bits, but there were bits to match...
6870 6877 */
6871 6878 if (match == 0 && ppriv != 0)
6872 6879 return (0);
6873 6880
6874 6881 /*
6875 6882 * Need to have permissions to the process, but don't...
6876 6883 */
6877 6884 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
6878 6885 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
6879 6886 return (0);
6880 6887 }
6881 6888
6882 6889 /*
6883 6890 * Need to be in the same zone unless we possess the
6884 6891 * privilege to examine all zones.
6885 6892 */
6886 6893 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
6887 6894 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
6888 6895 return (0);
6889 6896 }
6890 6897 }
6891 6898
6892 6899 return (1);
6893 6900 }
6894 6901
6895 6902 /*
6896 6903 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6897 6904 * consists of input pattern strings and an ops-vector to evaluate them.
6898 6905 * This function returns >0 for match, 0 for no match, and <0 for error.
6899 6906 */
6900 6907 static int
6901 6908 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
6902 6909 uint32_t priv, uid_t uid, zoneid_t zoneid)
6903 6910 {
6904 6911 dtrace_provider_t *pvp = prp->dtpr_provider;
6905 6912 int rv;
6906 6913
6907 6914 if (pvp->dtpv_defunct)
6908 6915 return (0);
6909 6916
6910 6917 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
6911 6918 return (rv);
6912 6919
6913 6920 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
6914 6921 return (rv);
6915 6922
6916 6923 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
6917 6924 return (rv);
6918 6925
6919 6926 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
6920 6927 return (rv);
6921 6928
6922 6929 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
6923 6930 return (0);
6924 6931
6925 6932 return (rv);
6926 6933 }
6927 6934
6928 6935 /*
6929 6936 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6930 6937 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6931 6938 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6932 6939 * In addition, all of the recursion cases except for '*' matching have been
6933 6940 * unwound. For '*', we still implement recursive evaluation, but a depth
6934 6941 * counter is maintained and matching is aborted if we recurse too deep.
6935 6942 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6936 6943 */
6937 6944 static int
6938 6945 dtrace_match_glob(const char *s, const char *p, int depth)
6939 6946 {
6940 6947 const char *olds;
6941 6948 char s1, c;
6942 6949 int gs;
6943 6950
6944 6951 if (depth > DTRACE_PROBEKEY_MAXDEPTH)
6945 6952 return (-1);
6946 6953
6947 6954 if (s == NULL)
6948 6955 s = ""; /* treat NULL as empty string */
6949 6956
6950 6957 top:
6951 6958 olds = s;
6952 6959 s1 = *s++;
6953 6960
6954 6961 if (p == NULL)
6955 6962 return (0);
6956 6963
6957 6964 if ((c = *p++) == '\0')
6958 6965 return (s1 == '\0');
6959 6966
6960 6967 switch (c) {
6961 6968 case '[': {
6962 6969 int ok = 0, notflag = 0;
6963 6970 char lc = '\0';
6964 6971
6965 6972 if (s1 == '\0')
6966 6973 return (0);
6967 6974
6968 6975 if (*p == '!') {
6969 6976 notflag = 1;
6970 6977 p++;
6971 6978 }
6972 6979
6973 6980 if ((c = *p++) == '\0')
6974 6981 return (0);
6975 6982
6976 6983 do {
6977 6984 if (c == '-' && lc != '\0' && *p != ']') {
6978 6985 if ((c = *p++) == '\0')
6979 6986 return (0);
6980 6987 if (c == '\\' && (c = *p++) == '\0')
6981 6988 return (0);
6982 6989
6983 6990 if (notflag) {
6984 6991 if (s1 < lc || s1 > c)
6985 6992 ok++;
6986 6993 else
6987 6994 return (0);
6988 6995 } else if (lc <= s1 && s1 <= c)
6989 6996 ok++;
6990 6997
6991 6998 } else if (c == '\\' && (c = *p++) == '\0')
6992 6999 return (0);
6993 7000
6994 7001 lc = c; /* save left-hand 'c' for next iteration */
6995 7002
6996 7003 if (notflag) {
6997 7004 if (s1 != c)
6998 7005 ok++;
6999 7006 else
7000 7007 return (0);
7001 7008 } else if (s1 == c)
7002 7009 ok++;
7003 7010
7004 7011 if ((c = *p++) == '\0')
7005 7012 return (0);
7006 7013
7007 7014 } while (c != ']');
7008 7015
7009 7016 if (ok)
7010 7017 goto top;
7011 7018
7012 7019 return (0);
7013 7020 }
7014 7021
7015 7022 case '\\':
7016 7023 if ((c = *p++) == '\0')
7017 7024 return (0);
7018 7025 /*FALLTHRU*/
7019 7026
7020 7027 default:
7021 7028 if (c != s1)
7022 7029 return (0);
7023 7030 /*FALLTHRU*/
7024 7031
7025 7032 case '?':
7026 7033 if (s1 != '\0')
7027 7034 goto top;
7028 7035 return (0);
7029 7036
7030 7037 case '*':
7031 7038 while (*p == '*')
7032 7039 p++; /* consecutive *'s are identical to a single one */
7033 7040
7034 7041 if (*p == '\0')
7035 7042 return (1);
7036 7043
7037 7044 for (s = olds; *s != '\0'; s++) {
7038 7045 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
7039 7046 return (gs);
7040 7047 }
7041 7048
7042 7049 return (0);
7043 7050 }
7044 7051 }
7045 7052
7046 7053 /*ARGSUSED*/
7047 7054 static int
7048 7055 dtrace_match_string(const char *s, const char *p, int depth)
7049 7056 {
7050 7057 return (s != NULL && strcmp(s, p) == 0);
7051 7058 }
7052 7059
7053 7060 /*ARGSUSED*/
7054 7061 static int
7055 7062 dtrace_match_nul(const char *s, const char *p, int depth)
7056 7063 {
7057 7064 return (1); /* always match the empty pattern */
7058 7065 }
7059 7066
7060 7067 /*ARGSUSED*/
7061 7068 static int
7062 7069 dtrace_match_nonzero(const char *s, const char *p, int depth)
7063 7070 {
7064 7071 return (s != NULL && s[0] != '\0');
7065 7072 }
7066 7073
7067 7074 static int
7068 7075 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
7069 7076 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
7070 7077 {
7071 7078 dtrace_probe_t template, *probe;
7072 7079 dtrace_hash_t *hash = NULL;
7073 7080 int len, rc, best = INT_MAX, nmatched = 0;
7074 7081 dtrace_id_t i;
7075 7082
7076 7083 ASSERT(MUTEX_HELD(&dtrace_lock));
7077 7084
7078 7085 /*
7079 7086 * If the probe ID is specified in the key, just lookup by ID and
7080 7087 * invoke the match callback once if a matching probe is found.
7081 7088 */
7082 7089 if (pkp->dtpk_id != DTRACE_IDNONE) {
7083 7090 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
7084 7091 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
7085 7092 if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
7086 7093 return (DTRACE_MATCH_FAIL);
7087 7094 nmatched++;
7088 7095 }
7089 7096 return (nmatched);
7090 7097 }
7091 7098
7092 7099 template.dtpr_mod = (char *)pkp->dtpk_mod;
7093 7100 template.dtpr_func = (char *)pkp->dtpk_func;
7094 7101 template.dtpr_name = (char *)pkp->dtpk_name;
7095 7102
7096 7103 /*
7097 7104 * We want to find the most distinct of the module name, function
7098 7105 * name, and name. So for each one that is not a glob pattern or
7099 7106 * empty string, we perform a lookup in the corresponding hash and
7100 7107 * use the hash table with the fewest collisions to do our search.
7101 7108 */
7102 7109 if (pkp->dtpk_mmatch == &dtrace_match_string &&
7103 7110 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
7104 7111 best = len;
7105 7112 hash = dtrace_bymod;
7106 7113 }
7107 7114
7108 7115 if (pkp->dtpk_fmatch == &dtrace_match_string &&
7109 7116 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
7110 7117 best = len;
7111 7118 hash = dtrace_byfunc;
7112 7119 }
7113 7120
7114 7121 if (pkp->dtpk_nmatch == &dtrace_match_string &&
7115 7122 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
7116 7123 best = len;
7117 7124 hash = dtrace_byname;
7118 7125 }
7119 7126
7120 7127 /*
7121 7128 * If we did not select a hash table, iterate over every probe and
7122 7129 * invoke our callback for each one that matches our input probe key.
7123 7130 */
7124 7131 if (hash == NULL) {
7125 7132 for (i = 0; i < dtrace_nprobes; i++) {
7126 7133 if ((probe = dtrace_probes[i]) == NULL ||
7127 7134 dtrace_match_probe(probe, pkp, priv, uid,
7128 7135 zoneid) <= 0)
7129 7136 continue;
7130 7137
7131 7138 nmatched++;
7132 7139
7133 7140 if ((rc = (*matched)(probe, arg)) !=
7134 7141 DTRACE_MATCH_NEXT) {
7135 7142 if (rc == DTRACE_MATCH_FAIL)
7136 7143 return (DTRACE_MATCH_FAIL);
7137 7144 break;
7138 7145 }
7139 7146 }
7140 7147
7141 7148 return (nmatched);
7142 7149 }
7143 7150
7144 7151 /*
7145 7152 * If we selected a hash table, iterate over each probe of the same key
7146 7153 * name and invoke the callback for every probe that matches the other
7147 7154 * attributes of our input probe key.
7148 7155 */
7149 7156 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
7150 7157 probe = *(DTRACE_HASHNEXT(hash, probe))) {
7151 7158
7152 7159 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
7153 7160 continue;
7154 7161
7155 7162 nmatched++;
7156 7163
7157 7164 if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
7158 7165 if (rc == DTRACE_MATCH_FAIL)
7159 7166 return (DTRACE_MATCH_FAIL);
7160 7167 break;
7161 7168 }
7162 7169 }
7163 7170
7164 7171 return (nmatched);
7165 7172 }
7166 7173
7167 7174 /*
7168 7175 * Return the function pointer dtrace_probecmp() should use to compare the
7169 7176 * specified pattern with a string. For NULL or empty patterns, we select
7170 7177 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7171 7178 * For non-empty non-glob strings, we use dtrace_match_string().
7172 7179 */
7173 7180 static dtrace_probekey_f *
7174 7181 dtrace_probekey_func(const char *p)
7175 7182 {
7176 7183 char c;
7177 7184
7178 7185 if (p == NULL || *p == '\0')
7179 7186 return (&dtrace_match_nul);
7180 7187
7181 7188 while ((c = *p++) != '\0') {
7182 7189 if (c == '[' || c == '?' || c == '*' || c == '\\')
7183 7190 return (&dtrace_match_glob);
7184 7191 }
7185 7192
7186 7193 return (&dtrace_match_string);
7187 7194 }
7188 7195
7189 7196 /*
7190 7197 * Build a probe comparison key for use with dtrace_match_probe() from the
7191 7198 * given probe description. By convention, a null key only matches anchored
7192 7199 * probes: if each field is the empty string, reset dtpk_fmatch to
7193 7200 * dtrace_match_nonzero().
7194 7201 */
7195 7202 static void
7196 7203 dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
7197 7204 {
7198 7205 pkp->dtpk_prov = pdp->dtpd_provider;
7199 7206 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
7200 7207
7201 7208 pkp->dtpk_mod = pdp->dtpd_mod;
7202 7209 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
7203 7210
7204 7211 pkp->dtpk_func = pdp->dtpd_func;
7205 7212 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
7206 7213
7207 7214 pkp->dtpk_name = pdp->dtpd_name;
7208 7215 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
7209 7216
7210 7217 pkp->dtpk_id = pdp->dtpd_id;
7211 7218
7212 7219 if (pkp->dtpk_id == DTRACE_IDNONE &&
7213 7220 pkp->dtpk_pmatch == &dtrace_match_nul &&
7214 7221 pkp->dtpk_mmatch == &dtrace_match_nul &&
7215 7222 pkp->dtpk_fmatch == &dtrace_match_nul &&
7216 7223 pkp->dtpk_nmatch == &dtrace_match_nul)
7217 7224 pkp->dtpk_fmatch = &dtrace_match_nonzero;
7218 7225 }
7219 7226
7220 7227 /*
7221 7228 * DTrace Provider-to-Framework API Functions
7222 7229 *
7223 7230 * These functions implement much of the Provider-to-Framework API, as
7224 7231 * described in <sys/dtrace.h>. The parts of the API not in this section are
7225 7232 * the functions in the API for probe management (found below), and
7226 7233 * dtrace_probe() itself (found above).
7227 7234 */
7228 7235
7229 7236 /*
7230 7237 * Register the calling provider with the DTrace framework. This should
7231 7238 * generally be called by DTrace providers in their attach(9E) entry point.
7232 7239 */
7233 7240 int
7234 7241 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
7235 7242 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
7236 7243 {
7237 7244 dtrace_provider_t *provider;
7238 7245
7239 7246 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
7240 7247 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7241 7248 "arguments", name ? name : "<NULL>");
7242 7249 return (EINVAL);
7243 7250 }
7244 7251
7245 7252 if (name[0] == '\0' || dtrace_badname(name)) {
7246 7253 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7247 7254 "provider name", name);
7248 7255 return (EINVAL);
7249 7256 }
7250 7257
7251 7258 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
7252 7259 pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
7253 7260 pops->dtps_destroy == NULL ||
7254 7261 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
7255 7262 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7256 7263 "provider ops", name);
7257 7264 return (EINVAL);
7258 7265 }
7259 7266
7260 7267 if (dtrace_badattr(&pap->dtpa_provider) ||
7261 7268 dtrace_badattr(&pap->dtpa_mod) ||
7262 7269 dtrace_badattr(&pap->dtpa_func) ||
7263 7270 dtrace_badattr(&pap->dtpa_name) ||
7264 7271 dtrace_badattr(&pap->dtpa_args)) {
7265 7272 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7266 7273 "provider attributes", name);
7267 7274 return (EINVAL);
7268 7275 }
7269 7276
7270 7277 if (priv & ~DTRACE_PRIV_ALL) {
7271 7278 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7272 7279 "privilege attributes", name);
7273 7280 return (EINVAL);
7274 7281 }
7275 7282
7276 7283 if ((priv & DTRACE_PRIV_KERNEL) &&
7277 7284 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
7278 7285 pops->dtps_mode == NULL) {
7279 7286 cmn_err(CE_WARN, "failed to register provider '%s': need "
7280 7287 "dtps_mode() op for given privilege attributes", name);
7281 7288 return (EINVAL);
7282 7289 }
7283 7290
7284 7291 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
7285 7292 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
7286 7293 (void) strcpy(provider->dtpv_name, name);
7287 7294
7288 7295 provider->dtpv_attr = *pap;
7289 7296 provider->dtpv_priv.dtpp_flags = priv;
7290 7297 if (cr != NULL) {
7291 7298 provider->dtpv_priv.dtpp_uid = crgetuid(cr);
7292 7299 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
7293 7300 }
7294 7301 provider->dtpv_pops = *pops;
7295 7302
7296 7303 if (pops->dtps_provide == NULL) {
7297 7304 ASSERT(pops->dtps_provide_module != NULL);
7298 7305 provider->dtpv_pops.dtps_provide =
7299 7306 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop;
7300 7307 }
7301 7308
7302 7309 if (pops->dtps_provide_module == NULL) {
7303 7310 ASSERT(pops->dtps_provide != NULL);
7304 7311 provider->dtpv_pops.dtps_provide_module =
7305 7312 (void (*)(void *, struct modctl *))dtrace_nullop;
7306 7313 }
7307 7314
7308 7315 if (pops->dtps_suspend == NULL) {
7309 7316 ASSERT(pops->dtps_resume == NULL);
7310 7317 provider->dtpv_pops.dtps_suspend =
7311 7318 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7312 7319 provider->dtpv_pops.dtps_resume =
7313 7320 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7314 7321 }
7315 7322
7316 7323 provider->dtpv_arg = arg;
7317 7324 *idp = (dtrace_provider_id_t)provider;
7318 7325
7319 7326 if (pops == &dtrace_provider_ops) {
7320 7327 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7321 7328 ASSERT(MUTEX_HELD(&dtrace_lock));
7322 7329 ASSERT(dtrace_anon.dta_enabling == NULL);
7323 7330
7324 7331 /*
7325 7332 * We make sure that the DTrace provider is at the head of
7326 7333 * the provider chain.
7327 7334 */
7328 7335 provider->dtpv_next = dtrace_provider;
7329 7336 dtrace_provider = provider;
7330 7337 return (0);
7331 7338 }
7332 7339
7333 7340 mutex_enter(&dtrace_provider_lock);
7334 7341 mutex_enter(&dtrace_lock);
7335 7342
7336 7343 /*
7337 7344 * If there is at least one provider registered, we'll add this
7338 7345 * provider after the first provider.
7339 7346 */
7340 7347 if (dtrace_provider != NULL) {
7341 7348 provider->dtpv_next = dtrace_provider->dtpv_next;
7342 7349 dtrace_provider->dtpv_next = provider;
7343 7350 } else {
7344 7351 dtrace_provider = provider;
7345 7352 }
7346 7353
7347 7354 if (dtrace_retained != NULL) {
7348 7355 dtrace_enabling_provide(provider);
7349 7356
7350 7357 /*
7351 7358 * Now we need to call dtrace_enabling_matchall() -- which
7352 7359 * will acquire cpu_lock and dtrace_lock. We therefore need
7353 7360 * to drop all of our locks before calling into it...
7354 7361 */
7355 7362 mutex_exit(&dtrace_lock);
7356 7363 mutex_exit(&dtrace_provider_lock);
7357 7364 dtrace_enabling_matchall();
7358 7365
7359 7366 return (0);
7360 7367 }
7361 7368
7362 7369 mutex_exit(&dtrace_lock);
7363 7370 mutex_exit(&dtrace_provider_lock);
7364 7371
7365 7372 return (0);
7366 7373 }
7367 7374
7368 7375 /*
7369 7376 * Unregister the specified provider from the DTrace framework. This should
7370 7377 * generally be called by DTrace providers in their detach(9E) entry point.
7371 7378 */
7372 7379 int
7373 7380 dtrace_unregister(dtrace_provider_id_t id)
7374 7381 {
7375 7382 dtrace_provider_t *old = (dtrace_provider_t *)id;
7376 7383 dtrace_provider_t *prev = NULL;
7377 7384 int i, self = 0, noreap = 0;
7378 7385 dtrace_probe_t *probe, *first = NULL;
7379 7386
7380 7387 if (old->dtpv_pops.dtps_enable ==
7381 7388 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
7382 7389 /*
7383 7390 * If DTrace itself is the provider, we're called with locks
7384 7391 * already held.
7385 7392 */
7386 7393 ASSERT(old == dtrace_provider);
7387 7394 ASSERT(dtrace_devi != NULL);
7388 7395 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7389 7396 ASSERT(MUTEX_HELD(&dtrace_lock));
7390 7397 self = 1;
7391 7398
7392 7399 if (dtrace_provider->dtpv_next != NULL) {
7393 7400 /*
7394 7401 * There's another provider here; return failure.
7395 7402 */
7396 7403 return (EBUSY);
7397 7404 }
7398 7405 } else {
7399 7406 mutex_enter(&dtrace_provider_lock);
7400 7407 mutex_enter(&mod_lock);
7401 7408 mutex_enter(&dtrace_lock);
7402 7409 }
7403 7410
7404 7411 /*
7405 7412 * If anyone has /dev/dtrace open, or if there are anonymous enabled
7406 7413 * probes, we refuse to let providers slither away, unless this
7407 7414 * provider has already been explicitly invalidated.
7408 7415 */
7409 7416 if (!old->dtpv_defunct &&
7410 7417 (dtrace_opens || (dtrace_anon.dta_state != NULL &&
7411 7418 dtrace_anon.dta_state->dts_necbs > 0))) {
7412 7419 if (!self) {
7413 7420 mutex_exit(&dtrace_lock);
7414 7421 mutex_exit(&mod_lock);
7415 7422 mutex_exit(&dtrace_provider_lock);
7416 7423 }
7417 7424 return (EBUSY);
7418 7425 }
7419 7426
7420 7427 /*
7421 7428 * Attempt to destroy the probes associated with this provider.
7422 7429 */
7423 7430 for (i = 0; i < dtrace_nprobes; i++) {
7424 7431 if ((probe = dtrace_probes[i]) == NULL)
7425 7432 continue;
7426 7433
7427 7434 if (probe->dtpr_provider != old)
7428 7435 continue;
7429 7436
7430 7437 if (probe->dtpr_ecb == NULL)
7431 7438 continue;
7432 7439
7433 7440 /*
7434 7441 * If we are trying to unregister a defunct provider, and the
7435 7442 * provider was made defunct within the interval dictated by
7436 7443 * dtrace_unregister_defunct_reap, we'll (asynchronously)
7437 7444 * attempt to reap our enablings. To denote that the provider
7438 7445 * should reattempt to unregister itself at some point in the
7439 7446 * future, we will return a differentiable error code (EAGAIN
7440 7447 * instead of EBUSY) in this case.
7441 7448 */
7442 7449 if (dtrace_gethrtime() - old->dtpv_defunct >
7443 7450 dtrace_unregister_defunct_reap)
7444 7451 noreap = 1;
7445 7452
7446 7453 if (!self) {
7447 7454 mutex_exit(&dtrace_lock);
7448 7455 mutex_exit(&mod_lock);
7449 7456 mutex_exit(&dtrace_provider_lock);
7450 7457 }
7451 7458
7452 7459 if (noreap)
7453 7460 return (EBUSY);
7454 7461
7455 7462 (void) taskq_dispatch(dtrace_taskq,
7456 7463 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
7457 7464
7458 7465 return (EAGAIN);
7459 7466 }
7460 7467
7461 7468 /*
7462 7469 * All of the probes for this provider are disabled; we can safely
7463 7470 * remove all of them from their hash chains and from the probe array.
7464 7471 */
7465 7472 for (i = 0; i < dtrace_nprobes; i++) {
7466 7473 if ((probe = dtrace_probes[i]) == NULL)
7467 7474 continue;
7468 7475
7469 7476 if (probe->dtpr_provider != old)
7470 7477 continue;
7471 7478
7472 7479 dtrace_probes[i] = NULL;
7473 7480
7474 7481 dtrace_hash_remove(dtrace_bymod, probe);
7475 7482 dtrace_hash_remove(dtrace_byfunc, probe);
7476 7483 dtrace_hash_remove(dtrace_byname, probe);
7477 7484
7478 7485 if (first == NULL) {
7479 7486 first = probe;
7480 7487 probe->dtpr_nextmod = NULL;
7481 7488 } else {
7482 7489 probe->dtpr_nextmod = first;
7483 7490 first = probe;
7484 7491 }
7485 7492 }
7486 7493
7487 7494 /*
7488 7495 * The provider's probes have been removed from the hash chains and
7489 7496 * from the probe array. Now issue a dtrace_sync() to be sure that
7490 7497 * everyone has cleared out from any probe array processing.
7491 7498 */
7492 7499 dtrace_sync();
7493 7500
7494 7501 for (probe = first; probe != NULL; probe = first) {
7495 7502 first = probe->dtpr_nextmod;
7496 7503
7497 7504 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
7498 7505 probe->dtpr_arg);
7499 7506 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7500 7507 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7501 7508 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7502 7509 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
7503 7510 kmem_free(probe, sizeof (dtrace_probe_t));
7504 7511 }
7505 7512
7506 7513 if ((prev = dtrace_provider) == old) {
7507 7514 ASSERT(self || dtrace_devi == NULL);
7508 7515 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
7509 7516 dtrace_provider = old->dtpv_next;
7510 7517 } else {
7511 7518 while (prev != NULL && prev->dtpv_next != old)
7512 7519 prev = prev->dtpv_next;
7513 7520
7514 7521 if (prev == NULL) {
7515 7522 panic("attempt to unregister non-existent "
7516 7523 "dtrace provider %p\n", (void *)id);
7517 7524 }
7518 7525
7519 7526 prev->dtpv_next = old->dtpv_next;
7520 7527 }
7521 7528
7522 7529 if (!self) {
7523 7530 mutex_exit(&dtrace_lock);
7524 7531 mutex_exit(&mod_lock);
7525 7532 mutex_exit(&dtrace_provider_lock);
7526 7533 }
7527 7534
7528 7535 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
7529 7536 kmem_free(old, sizeof (dtrace_provider_t));
7530 7537
7531 7538 return (0);
7532 7539 }
7533 7540
7534 7541 /*
7535 7542 * Invalidate the specified provider. All subsequent probe lookups for the
7536 7543 * specified provider will fail, but its probes will not be removed.
7537 7544 */
7538 7545 void
7539 7546 dtrace_invalidate(dtrace_provider_id_t id)
7540 7547 {
7541 7548 dtrace_provider_t *pvp = (dtrace_provider_t *)id;
7542 7549
7543 7550 ASSERT(pvp->dtpv_pops.dtps_enable !=
7544 7551 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
7545 7552
7546 7553 mutex_enter(&dtrace_provider_lock);
7547 7554 mutex_enter(&dtrace_lock);
7548 7555
7549 7556 pvp->dtpv_defunct = dtrace_gethrtime();
7550 7557
7551 7558 mutex_exit(&dtrace_lock);
7552 7559 mutex_exit(&dtrace_provider_lock);
7553 7560 }
7554 7561
7555 7562 /*
7556 7563 * Indicate whether or not DTrace has attached.
7557 7564 */
7558 7565 int
7559 7566 dtrace_attached(void)
7560 7567 {
7561 7568 /*
7562 7569 * dtrace_provider will be non-NULL iff the DTrace driver has
7563 7570 * attached. (It's non-NULL because DTrace is always itself a
7564 7571 * provider.)
7565 7572 */
7566 7573 return (dtrace_provider != NULL);
7567 7574 }
7568 7575
7569 7576 /*
7570 7577 * Remove all the unenabled probes for the given provider. This function is
7571 7578 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7572 7579 * -- just as many of its associated probes as it can.
7573 7580 */
7574 7581 int
7575 7582 dtrace_condense(dtrace_provider_id_t id)
7576 7583 {
7577 7584 dtrace_provider_t *prov = (dtrace_provider_t *)id;
7578 7585 int i;
7579 7586 dtrace_probe_t *probe;
7580 7587
7581 7588 /*
7582 7589 * Make sure this isn't the dtrace provider itself.
7583 7590 */
7584 7591 ASSERT(prov->dtpv_pops.dtps_enable !=
7585 7592 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
7586 7593
7587 7594 mutex_enter(&dtrace_provider_lock);
7588 7595 mutex_enter(&dtrace_lock);
7589 7596
7590 7597 /*
7591 7598 * Attempt to destroy the probes associated with this provider.
7592 7599 */
7593 7600 for (i = 0; i < dtrace_nprobes; i++) {
7594 7601 if ((probe = dtrace_probes[i]) == NULL)
7595 7602 continue;
7596 7603
7597 7604 if (probe->dtpr_provider != prov)
7598 7605 continue;
7599 7606
7600 7607 if (probe->dtpr_ecb != NULL)
7601 7608 continue;
7602 7609
7603 7610 dtrace_probes[i] = NULL;
7604 7611
7605 7612 dtrace_hash_remove(dtrace_bymod, probe);
7606 7613 dtrace_hash_remove(dtrace_byfunc, probe);
7607 7614 dtrace_hash_remove(dtrace_byname, probe);
7608 7615
7609 7616 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
7610 7617 probe->dtpr_arg);
7611 7618 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7612 7619 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7613 7620 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7614 7621 kmem_free(probe, sizeof (dtrace_probe_t));
7615 7622 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
7616 7623 }
7617 7624
7618 7625 mutex_exit(&dtrace_lock);
7619 7626 mutex_exit(&dtrace_provider_lock);
7620 7627
7621 7628 return (0);
7622 7629 }
7623 7630
7624 7631 /*
7625 7632 * DTrace Probe Management Functions
7626 7633 *
7627 7634 * The functions in this section perform the DTrace probe management,
7628 7635 * including functions to create probes, look-up probes, and call into the
7629 7636 * providers to request that probes be provided. Some of these functions are
7630 7637 * in the Provider-to-Framework API; these functions can be identified by the
7631 7638 * fact that they are not declared "static".
7632 7639 */
7633 7640
7634 7641 /*
7635 7642 * Create a probe with the specified module name, function name, and name.
7636 7643 */
7637 7644 dtrace_id_t
7638 7645 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
7639 7646 const char *func, const char *name, int aframes, void *arg)
7640 7647 {
7641 7648 dtrace_probe_t *probe, **probes;
7642 7649 dtrace_provider_t *provider = (dtrace_provider_t *)prov;
7643 7650 dtrace_id_t id;
7644 7651
7645 7652 if (provider == dtrace_provider) {
7646 7653 ASSERT(MUTEX_HELD(&dtrace_lock));
7647 7654 } else {
7648 7655 mutex_enter(&dtrace_lock);
7649 7656 }
7650 7657
7651 7658 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
7652 7659 VM_BESTFIT | VM_SLEEP);
7653 7660 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
7654 7661
7655 7662 probe->dtpr_id = id;
7656 7663 probe->dtpr_gen = dtrace_probegen++;
7657 7664 probe->dtpr_mod = dtrace_strdup(mod);
7658 7665 probe->dtpr_func = dtrace_strdup(func);
7659 7666 probe->dtpr_name = dtrace_strdup(name);
7660 7667 probe->dtpr_arg = arg;
7661 7668 probe->dtpr_aframes = aframes;
7662 7669 probe->dtpr_provider = provider;
7663 7670
7664 7671 dtrace_hash_add(dtrace_bymod, probe);
7665 7672 dtrace_hash_add(dtrace_byfunc, probe);
7666 7673 dtrace_hash_add(dtrace_byname, probe);
7667 7674
7668 7675 if (id - 1 >= dtrace_nprobes) {
7669 7676 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
7670 7677 size_t nsize = osize << 1;
7671 7678
7672 7679 if (nsize == 0) {
7673 7680 ASSERT(osize == 0);
7674 7681 ASSERT(dtrace_probes == NULL);
7675 7682 nsize = sizeof (dtrace_probe_t *);
7676 7683 }
7677 7684
7678 7685 probes = kmem_zalloc(nsize, KM_SLEEP);
7679 7686
7680 7687 if (dtrace_probes == NULL) {
7681 7688 ASSERT(osize == 0);
7682 7689 dtrace_probes = probes;
7683 7690 dtrace_nprobes = 1;
7684 7691 } else {
7685 7692 dtrace_probe_t **oprobes = dtrace_probes;
7686 7693
7687 7694 bcopy(oprobes, probes, osize);
7688 7695 dtrace_membar_producer();
7689 7696 dtrace_probes = probes;
7690 7697
7691 7698 dtrace_sync();
7692 7699
7693 7700 /*
7694 7701 * All CPUs are now seeing the new probes array; we can
7695 7702 * safely free the old array.
7696 7703 */
7697 7704 kmem_free(oprobes, osize);
7698 7705 dtrace_nprobes <<= 1;
7699 7706 }
7700 7707
7701 7708 ASSERT(id - 1 < dtrace_nprobes);
7702 7709 }
7703 7710
7704 7711 ASSERT(dtrace_probes[id - 1] == NULL);
7705 7712 dtrace_probes[id - 1] = probe;
7706 7713
7707 7714 if (provider != dtrace_provider)
7708 7715 mutex_exit(&dtrace_lock);
7709 7716
7710 7717 return (id);
7711 7718 }
7712 7719
7713 7720 static dtrace_probe_t *
7714 7721 dtrace_probe_lookup_id(dtrace_id_t id)
7715 7722 {
7716 7723 ASSERT(MUTEX_HELD(&dtrace_lock));
7717 7724
7718 7725 if (id == 0 || id > dtrace_nprobes)
7719 7726 return (NULL);
7720 7727
7721 7728 return (dtrace_probes[id - 1]);
7722 7729 }
7723 7730
7724 7731 static int
7725 7732 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
7726 7733 {
7727 7734 *((dtrace_id_t *)arg) = probe->dtpr_id;
7728 7735
7729 7736 return (DTRACE_MATCH_DONE);
7730 7737 }
7731 7738
7732 7739 /*
7733 7740 * Look up a probe based on provider and one or more of module name, function
7734 7741 * name and probe name.
7735 7742 */
7736 7743 dtrace_id_t
7737 7744 dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
7738 7745 const char *func, const char *name)
7739 7746 {
7740 7747 dtrace_probekey_t pkey;
7741 7748 dtrace_id_t id;
7742 7749 int match;
7743 7750
7744 7751 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
7745 7752 pkey.dtpk_pmatch = &dtrace_match_string;
7746 7753 pkey.dtpk_mod = mod;
7747 7754 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
7748 7755 pkey.dtpk_func = func;
7749 7756 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
7750 7757 pkey.dtpk_name = name;
7751 7758 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
7752 7759 pkey.dtpk_id = DTRACE_IDNONE;
7753 7760
7754 7761 mutex_enter(&dtrace_lock);
7755 7762 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
7756 7763 dtrace_probe_lookup_match, &id);
7757 7764 mutex_exit(&dtrace_lock);
7758 7765
7759 7766 ASSERT(match == 1 || match == 0);
7760 7767 return (match ? id : 0);
7761 7768 }
7762 7769
7763 7770 /*
7764 7771 * Returns the probe argument associated with the specified probe.
7765 7772 */
7766 7773 void *
7767 7774 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
7768 7775 {
7769 7776 dtrace_probe_t *probe;
7770 7777 void *rval = NULL;
7771 7778
7772 7779 mutex_enter(&dtrace_lock);
7773 7780
7774 7781 if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
7775 7782 probe->dtpr_provider == (dtrace_provider_t *)id)
7776 7783 rval = probe->dtpr_arg;
7777 7784
7778 7785 mutex_exit(&dtrace_lock);
7779 7786
7780 7787 return (rval);
7781 7788 }
7782 7789
7783 7790 /*
7784 7791 * Copy a probe into a probe description.
7785 7792 */
7786 7793 static void
7787 7794 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
7788 7795 {
7789 7796 bzero(pdp, sizeof (dtrace_probedesc_t));
7790 7797 pdp->dtpd_id = prp->dtpr_id;
7791 7798
7792 7799 (void) strncpy(pdp->dtpd_provider,
7793 7800 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
7794 7801
7795 7802 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
7796 7803 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
7797 7804 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
7798 7805 }
7799 7806
7800 7807 /*
7801 7808 * Called to indicate that a probe -- or probes -- should be provided by a
7802 7809 * specfied provider. If the specified description is NULL, the provider will
7803 7810 * be told to provide all of its probes. (This is done whenever a new
7804 7811 * consumer comes along, or whenever a retained enabling is to be matched.) If
7805 7812 * the specified description is non-NULL, the provider is given the
7806 7813 * opportunity to dynamically provide the specified probe, allowing providers
7807 7814 * to support the creation of probes on-the-fly. (So-called _autocreated_
7808 7815 * probes.) If the provider is NULL, the operations will be applied to all
7809 7816 * providers; if the provider is non-NULL the operations will only be applied
7810 7817 * to the specified provider. The dtrace_provider_lock must be held, and the
7811 7818 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7812 7819 * will need to grab the dtrace_lock when it reenters the framework through
7813 7820 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7814 7821 */
7815 7822 static void
7816 7823 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
7817 7824 {
7818 7825 struct modctl *ctl;
7819 7826 int all = 0;
7820 7827
7821 7828 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7822 7829
7823 7830 if (prv == NULL) {
7824 7831 all = 1;
7825 7832 prv = dtrace_provider;
7826 7833 }
7827 7834
7828 7835 do {
7829 7836 /*
7830 7837 * First, call the blanket provide operation.
7831 7838 */
7832 7839 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
7833 7840
7834 7841 /*
7835 7842 * Now call the per-module provide operation. We will grab
7836 7843 * mod_lock to prevent the list from being modified. Note
7837 7844 * that this also prevents the mod_busy bits from changing.
7838 7845 * (mod_busy can only be changed with mod_lock held.)
7839 7846 */
7840 7847 mutex_enter(&mod_lock);
7841 7848
7842 7849 ctl = &modules;
7843 7850 do {
7844 7851 if (ctl->mod_busy || ctl->mod_mp == NULL)
7845 7852 continue;
7846 7853
7847 7854 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
7848 7855
7849 7856 } while ((ctl = ctl->mod_next) != &modules);
7850 7857
7851 7858 mutex_exit(&mod_lock);
7852 7859 } while (all && (prv = prv->dtpv_next) != NULL);
7853 7860 }
7854 7861
7855 7862 /*
7856 7863 * Iterate over each probe, and call the Framework-to-Provider API function
7857 7864 * denoted by offs.
7858 7865 */
7859 7866 static void
7860 7867 dtrace_probe_foreach(uintptr_t offs)
7861 7868 {
7862 7869 dtrace_provider_t *prov;
7863 7870 void (*func)(void *, dtrace_id_t, void *);
7864 7871 dtrace_probe_t *probe;
7865 7872 dtrace_icookie_t cookie;
7866 7873 int i;
7867 7874
7868 7875 /*
7869 7876 * We disable interrupts to walk through the probe array. This is
7870 7877 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7871 7878 * won't see stale data.
7872 7879 */
7873 7880 cookie = dtrace_interrupt_disable();
7874 7881
7875 7882 for (i = 0; i < dtrace_nprobes; i++) {
7876 7883 if ((probe = dtrace_probes[i]) == NULL)
7877 7884 continue;
7878 7885
7879 7886 if (probe->dtpr_ecb == NULL) {
7880 7887 /*
7881 7888 * This probe isn't enabled -- don't call the function.
7882 7889 */
7883 7890 continue;
7884 7891 }
7885 7892
7886 7893 prov = probe->dtpr_provider;
7887 7894 func = *((void(**)(void *, dtrace_id_t, void *))
7888 7895 ((uintptr_t)&prov->dtpv_pops + offs));
7889 7896
7890 7897 func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
7891 7898 }
7892 7899
7893 7900 dtrace_interrupt_enable(cookie);
7894 7901 }
7895 7902
7896 7903 static int
7897 7904 dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
7898 7905 {
7899 7906 dtrace_probekey_t pkey;
7900 7907 uint32_t priv;
7901 7908 uid_t uid;
7902 7909 zoneid_t zoneid;
7903 7910
7904 7911 ASSERT(MUTEX_HELD(&dtrace_lock));
7905 7912 dtrace_ecb_create_cache = NULL;
7906 7913
7907 7914 if (desc == NULL) {
7908 7915 /*
7909 7916 * If we're passed a NULL description, we're being asked to
7910 7917 * create an ECB with a NULL probe.
7911 7918 */
7912 7919 (void) dtrace_ecb_create_enable(NULL, enab);
7913 7920 return (0);
7914 7921 }
7915 7922
7916 7923 dtrace_probekey(desc, &pkey);
7917 7924 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
7918 7925 &priv, &uid, &zoneid);
7919 7926
7920 7927 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
7921 7928 enab));
7922 7929 }
7923 7930
7924 7931 /*
7925 7932 * DTrace Helper Provider Functions
7926 7933 */
7927 7934 static void
7928 7935 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
7929 7936 {
7930 7937 attr->dtat_name = DOF_ATTR_NAME(dofattr);
7931 7938 attr->dtat_data = DOF_ATTR_DATA(dofattr);
7932 7939 attr->dtat_class = DOF_ATTR_CLASS(dofattr);
7933 7940 }
7934 7941
7935 7942 static void
7936 7943 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
7937 7944 const dof_provider_t *dofprov, char *strtab)
7938 7945 {
7939 7946 hprov->dthpv_provname = strtab + dofprov->dofpv_name;
7940 7947 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
7941 7948 dofprov->dofpv_provattr);
7942 7949 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
7943 7950 dofprov->dofpv_modattr);
7944 7951 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
7945 7952 dofprov->dofpv_funcattr);
7946 7953 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
7947 7954 dofprov->dofpv_nameattr);
7948 7955 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
7949 7956 dofprov->dofpv_argsattr);
7950 7957 }
7951 7958
7952 7959 static void
7953 7960 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
7954 7961 {
7955 7962 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7956 7963 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7957 7964 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
7958 7965 dof_provider_t *provider;
7959 7966 dof_probe_t *probe;
7960 7967 uint32_t *off, *enoff;
7961 7968 uint8_t *arg;
7962 7969 char *strtab;
7963 7970 uint_t i, nprobes;
7964 7971 dtrace_helper_provdesc_t dhpv;
7965 7972 dtrace_helper_probedesc_t dhpb;
7966 7973 dtrace_meta_t *meta = dtrace_meta_pid;
7967 7974 dtrace_mops_t *mops = &meta->dtm_mops;
7968 7975 void *parg;
7969 7976
7970 7977 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
7971 7978 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7972 7979 provider->dofpv_strtab * dof->dofh_secsize);
7973 7980 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7974 7981 provider->dofpv_probes * dof->dofh_secsize);
7975 7982 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7976 7983 provider->dofpv_prargs * dof->dofh_secsize);
7977 7984 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7978 7985 provider->dofpv_proffs * dof->dofh_secsize);
7979 7986
7980 7987 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
7981 7988 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
7982 7989 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
7983 7990 enoff = NULL;
7984 7991
7985 7992 /*
7986 7993 * See dtrace_helper_provider_validate().
7987 7994 */
7988 7995 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
7989 7996 provider->dofpv_prenoffs != DOF_SECT_NONE) {
7990 7997 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7991 7998 provider->dofpv_prenoffs * dof->dofh_secsize);
7992 7999 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
7993 8000 }
7994 8001
7995 8002 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
7996 8003
7997 8004 /*
7998 8005 * Create the provider.
7999 8006 */
8000 8007 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8001 8008
8002 8009 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
8003 8010 return;
8004 8011
8005 8012 meta->dtm_count++;
8006 8013
8007 8014 /*
8008 8015 * Create the probes.
8009 8016 */
8010 8017 for (i = 0; i < nprobes; i++) {
8011 8018 probe = (dof_probe_t *)(uintptr_t)(daddr +
8012 8019 prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
8013 8020
8014 8021 dhpb.dthpb_mod = dhp->dofhp_mod;
8015 8022 dhpb.dthpb_func = strtab + probe->dofpr_func;
8016 8023 dhpb.dthpb_name = strtab + probe->dofpr_name;
8017 8024 dhpb.dthpb_base = probe->dofpr_addr;
8018 8025 dhpb.dthpb_offs = off + probe->dofpr_offidx;
8019 8026 dhpb.dthpb_noffs = probe->dofpr_noffs;
8020 8027 if (enoff != NULL) {
8021 8028 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
8022 8029 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
8023 8030 } else {
8024 8031 dhpb.dthpb_enoffs = NULL;
8025 8032 dhpb.dthpb_nenoffs = 0;
8026 8033 }
8027 8034 dhpb.dthpb_args = arg + probe->dofpr_argidx;
8028 8035 dhpb.dthpb_nargc = probe->dofpr_nargc;
8029 8036 dhpb.dthpb_xargc = probe->dofpr_xargc;
8030 8037 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
8031 8038 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
8032 8039
8033 8040 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
8034 8041 }
8035 8042 }
8036 8043
8037 8044 static void
8038 8045 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
8039 8046 {
8040 8047 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8041 8048 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8042 8049 int i;
8043 8050
8044 8051 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
8045 8052
8046 8053 for (i = 0; i < dof->dofh_secnum; i++) {
8047 8054 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8048 8055 dof->dofh_secoff + i * dof->dofh_secsize);
8049 8056
8050 8057 if (sec->dofs_type != DOF_SECT_PROVIDER)
8051 8058 continue;
8052 8059
8053 8060 dtrace_helper_provide_one(dhp, sec, pid);
8054 8061 }
8055 8062
8056 8063 /*
8057 8064 * We may have just created probes, so we must now rematch against
8058 8065 * any retained enablings. Note that this call will acquire both
8059 8066 * cpu_lock and dtrace_lock; the fact that we are holding
8060 8067 * dtrace_meta_lock now is what defines the ordering with respect to
8061 8068 * these three locks.
8062 8069 */
8063 8070 dtrace_enabling_matchall();
8064 8071 }
8065 8072
8066 8073 static void
8067 8074 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
8068 8075 {
8069 8076 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8070 8077 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8071 8078 dof_sec_t *str_sec;
8072 8079 dof_provider_t *provider;
8073 8080 char *strtab;
8074 8081 dtrace_helper_provdesc_t dhpv;
8075 8082 dtrace_meta_t *meta = dtrace_meta_pid;
8076 8083 dtrace_mops_t *mops = &meta->dtm_mops;
8077 8084
8078 8085 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8079 8086 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8080 8087 provider->dofpv_strtab * dof->dofh_secsize);
8081 8088
8082 8089 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8083 8090
8084 8091 /*
8085 8092 * Create the provider.
8086 8093 */
8087 8094 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8088 8095
8089 8096 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
8090 8097
8091 8098 meta->dtm_count--;
8092 8099 }
8093 8100
8094 8101 static void
8095 8102 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
8096 8103 {
8097 8104 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8098 8105 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8099 8106 int i;
8100 8107
8101 8108 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
8102 8109
8103 8110 for (i = 0; i < dof->dofh_secnum; i++) {
8104 8111 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8105 8112 dof->dofh_secoff + i * dof->dofh_secsize);
8106 8113
8107 8114 if (sec->dofs_type != DOF_SECT_PROVIDER)
8108 8115 continue;
8109 8116
8110 8117 dtrace_helper_provider_remove_one(dhp, sec, pid);
8111 8118 }
8112 8119 }
8113 8120
8114 8121 /*
8115 8122 * DTrace Meta Provider-to-Framework API Functions
8116 8123 *
8117 8124 * These functions implement the Meta Provider-to-Framework API, as described
8118 8125 * in <sys/dtrace.h>.
8119 8126 */
8120 8127 int
8121 8128 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
8122 8129 dtrace_meta_provider_id_t *idp)
8123 8130 {
8124 8131 dtrace_meta_t *meta;
8125 8132 dtrace_helpers_t *help, *next;
8126 8133 int i;
8127 8134
8128 8135 *idp = DTRACE_METAPROVNONE;
8129 8136
8130 8137 /*
8131 8138 * We strictly don't need the name, but we hold onto it for
8132 8139 * debuggability. All hail error queues!
8133 8140 */
8134 8141 if (name == NULL) {
8135 8142 cmn_err(CE_WARN, "failed to register meta-provider: "
8136 8143 "invalid name");
8137 8144 return (EINVAL);
8138 8145 }
8139 8146
8140 8147 if (mops == NULL ||
8141 8148 mops->dtms_create_probe == NULL ||
8142 8149 mops->dtms_provide_pid == NULL ||
8143 8150 mops->dtms_remove_pid == NULL) {
8144 8151 cmn_err(CE_WARN, "failed to register meta-register %s: "
8145 8152 "invalid ops", name);
8146 8153 return (EINVAL);
8147 8154 }
8148 8155
8149 8156 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
8150 8157 meta->dtm_mops = *mops;
8151 8158 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
8152 8159 (void) strcpy(meta->dtm_name, name);
8153 8160 meta->dtm_arg = arg;
8154 8161
8155 8162 mutex_enter(&dtrace_meta_lock);
8156 8163 mutex_enter(&dtrace_lock);
8157 8164
8158 8165 if (dtrace_meta_pid != NULL) {
8159 8166 mutex_exit(&dtrace_lock);
8160 8167 mutex_exit(&dtrace_meta_lock);
8161 8168 cmn_err(CE_WARN, "failed to register meta-register %s: "
8162 8169 "user-land meta-provider exists", name);
8163 8170 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
8164 8171 kmem_free(meta, sizeof (dtrace_meta_t));
8165 8172 return (EINVAL);
8166 8173 }
8167 8174
8168 8175 dtrace_meta_pid = meta;
8169 8176 *idp = (dtrace_meta_provider_id_t)meta;
8170 8177
8171 8178 /*
8172 8179 * If there are providers and probes ready to go, pass them
8173 8180 * off to the new meta provider now.
8174 8181 */
8175 8182
8176 8183 help = dtrace_deferred_pid;
8177 8184 dtrace_deferred_pid = NULL;
8178 8185
8179 8186 mutex_exit(&dtrace_lock);
8180 8187
8181 8188 while (help != NULL) {
8182 8189 for (i = 0; i < help->dthps_nprovs; i++) {
8183 8190 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
8184 8191 help->dthps_pid);
8185 8192 }
8186 8193
8187 8194 next = help->dthps_next;
8188 8195 help->dthps_next = NULL;
8189 8196 help->dthps_prev = NULL;
8190 8197 help->dthps_deferred = 0;
8191 8198 help = next;
8192 8199 }
8193 8200
8194 8201 mutex_exit(&dtrace_meta_lock);
8195 8202
8196 8203 return (0);
8197 8204 }
8198 8205
8199 8206 int
8200 8207 dtrace_meta_unregister(dtrace_meta_provider_id_t id)
8201 8208 {
8202 8209 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
8203 8210
8204 8211 mutex_enter(&dtrace_meta_lock);
8205 8212 mutex_enter(&dtrace_lock);
8206 8213
8207 8214 if (old == dtrace_meta_pid) {
8208 8215 pp = &dtrace_meta_pid;
8209 8216 } else {
8210 8217 panic("attempt to unregister non-existent "
8211 8218 "dtrace meta-provider %p\n", (void *)old);
8212 8219 }
8213 8220
8214 8221 if (old->dtm_count != 0) {
8215 8222 mutex_exit(&dtrace_lock);
8216 8223 mutex_exit(&dtrace_meta_lock);
8217 8224 return (EBUSY);
8218 8225 }
8219 8226
8220 8227 *pp = NULL;
8221 8228
8222 8229 mutex_exit(&dtrace_lock);
8223 8230 mutex_exit(&dtrace_meta_lock);
8224 8231
8225 8232 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
8226 8233 kmem_free(old, sizeof (dtrace_meta_t));
8227 8234
8228 8235 return (0);
8229 8236 }
8230 8237
8231 8238
8232 8239 /*
8233 8240 * DTrace DIF Object Functions
8234 8241 */
8235 8242 static int
8236 8243 dtrace_difo_err(uint_t pc, const char *format, ...)
8237 8244 {
8238 8245 if (dtrace_err_verbose) {
8239 8246 va_list alist;
8240 8247
8241 8248 (void) uprintf("dtrace DIF object error: [%u]: ", pc);
8242 8249 va_start(alist, format);
8243 8250 (void) vuprintf(format, alist);
8244 8251 va_end(alist);
8245 8252 }
8246 8253
8247 8254 #ifdef DTRACE_ERRDEBUG
8248 8255 dtrace_errdebug(format);
8249 8256 #endif
8250 8257 return (1);
8251 8258 }
8252 8259
8253 8260 /*
8254 8261 * Validate a DTrace DIF object by checking the IR instructions. The following
8255 8262 * rules are currently enforced by dtrace_difo_validate():
8256 8263 *
8257 8264 * 1. Each instruction must have a valid opcode
8258 8265 * 2. Each register, string, variable, or subroutine reference must be valid
8259 8266 * 3. No instruction can modify register %r0 (must be zero)
8260 8267 * 4. All instruction reserved bits must be set to zero
8261 8268 * 5. The last instruction must be a "ret" instruction
8262 8269 * 6. All branch targets must reference a valid instruction _after_ the branch
8263 8270 */
8264 8271 static int
8265 8272 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
8266 8273 cred_t *cr)
8267 8274 {
8268 8275 int err = 0, i;
8269 8276 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8270 8277 int kcheckload;
8271 8278 uint_t pc;
8272 8279
8273 8280 kcheckload = cr == NULL ||
8274 8281 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
8275 8282
8276 8283 dp->dtdo_destructive = 0;
8277 8284
8278 8285 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
8279 8286 dif_instr_t instr = dp->dtdo_buf[pc];
8280 8287
8281 8288 uint_t r1 = DIF_INSTR_R1(instr);
8282 8289 uint_t r2 = DIF_INSTR_R2(instr);
8283 8290 uint_t rd = DIF_INSTR_RD(instr);
8284 8291 uint_t rs = DIF_INSTR_RS(instr);
8285 8292 uint_t label = DIF_INSTR_LABEL(instr);
8286 8293 uint_t v = DIF_INSTR_VAR(instr);
8287 8294 uint_t subr = DIF_INSTR_SUBR(instr);
8288 8295 uint_t type = DIF_INSTR_TYPE(instr);
8289 8296 uint_t op = DIF_INSTR_OP(instr);
8290 8297
8291 8298 switch (op) {
8292 8299 case DIF_OP_OR:
8293 8300 case DIF_OP_XOR:
8294 8301 case DIF_OP_AND:
8295 8302 case DIF_OP_SLL:
8296 8303 case DIF_OP_SRL:
8297 8304 case DIF_OP_SRA:
8298 8305 case DIF_OP_SUB:
8299 8306 case DIF_OP_ADD:
8300 8307 case DIF_OP_MUL:
8301 8308 case DIF_OP_SDIV:
8302 8309 case DIF_OP_UDIV:
8303 8310 case DIF_OP_SREM:
8304 8311 case DIF_OP_UREM:
8305 8312 case DIF_OP_COPYS:
8306 8313 if (r1 >= nregs)
8307 8314 err += efunc(pc, "invalid register %u\n", r1);
8308 8315 if (r2 >= nregs)
8309 8316 err += efunc(pc, "invalid register %u\n", r2);
8310 8317 if (rd >= nregs)
8311 8318 err += efunc(pc, "invalid register %u\n", rd);
8312 8319 if (rd == 0)
8313 8320 err += efunc(pc, "cannot write to %r0\n");
8314 8321 break;
8315 8322 case DIF_OP_NOT:
8316 8323 case DIF_OP_MOV:
8317 8324 case DIF_OP_ALLOCS:
8318 8325 if (r1 >= nregs)
8319 8326 err += efunc(pc, "invalid register %u\n", r1);
8320 8327 if (r2 != 0)
8321 8328 err += efunc(pc, "non-zero reserved bits\n");
8322 8329 if (rd >= nregs)
8323 8330 err += efunc(pc, "invalid register %u\n", rd);
8324 8331 if (rd == 0)
8325 8332 err += efunc(pc, "cannot write to %r0\n");
8326 8333 break;
8327 8334 case DIF_OP_LDSB:
8328 8335 case DIF_OP_LDSH:
8329 8336 case DIF_OP_LDSW:
8330 8337 case DIF_OP_LDUB:
8331 8338 case DIF_OP_LDUH:
8332 8339 case DIF_OP_LDUW:
8333 8340 case DIF_OP_LDX:
8334 8341 if (r1 >= nregs)
8335 8342 err += efunc(pc, "invalid register %u\n", r1);
8336 8343 if (r2 != 0)
8337 8344 err += efunc(pc, "non-zero reserved bits\n");
8338 8345 if (rd >= nregs)
8339 8346 err += efunc(pc, "invalid register %u\n", rd);
8340 8347 if (rd == 0)
8341 8348 err += efunc(pc, "cannot write to %r0\n");
8342 8349 if (kcheckload)
8343 8350 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
8344 8351 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
8345 8352 break;
8346 8353 case DIF_OP_RLDSB:
8347 8354 case DIF_OP_RLDSH:
8348 8355 case DIF_OP_RLDSW:
8349 8356 case DIF_OP_RLDUB:
8350 8357 case DIF_OP_RLDUH:
8351 8358 case DIF_OP_RLDUW:
8352 8359 case DIF_OP_RLDX:
8353 8360 if (r1 >= nregs)
8354 8361 err += efunc(pc, "invalid register %u\n", r1);
8355 8362 if (r2 != 0)
8356 8363 err += efunc(pc, "non-zero reserved bits\n");
8357 8364 if (rd >= nregs)
8358 8365 err += efunc(pc, "invalid register %u\n", rd);
8359 8366 if (rd == 0)
8360 8367 err += efunc(pc, "cannot write to %r0\n");
8361 8368 break;
8362 8369 case DIF_OP_ULDSB:
8363 8370 case DIF_OP_ULDSH:
8364 8371 case DIF_OP_ULDSW:
8365 8372 case DIF_OP_ULDUB:
8366 8373 case DIF_OP_ULDUH:
8367 8374 case DIF_OP_ULDUW:
8368 8375 case DIF_OP_ULDX:
8369 8376 if (r1 >= nregs)
8370 8377 err += efunc(pc, "invalid register %u\n", r1);
8371 8378 if (r2 != 0)
8372 8379 err += efunc(pc, "non-zero reserved bits\n");
8373 8380 if (rd >= nregs)
8374 8381 err += efunc(pc, "invalid register %u\n", rd);
8375 8382 if (rd == 0)
8376 8383 err += efunc(pc, "cannot write to %r0\n");
8377 8384 break;
8378 8385 case DIF_OP_STB:
8379 8386 case DIF_OP_STH:
8380 8387 case DIF_OP_STW:
8381 8388 case DIF_OP_STX:
8382 8389 if (r1 >= nregs)
8383 8390 err += efunc(pc, "invalid register %u\n", r1);
8384 8391 if (r2 != 0)
8385 8392 err += efunc(pc, "non-zero reserved bits\n");
8386 8393 if (rd >= nregs)
8387 8394 err += efunc(pc, "invalid register %u\n", rd);
8388 8395 if (rd == 0)
8389 8396 err += efunc(pc, "cannot write to 0 address\n");
8390 8397 break;
8391 8398 case DIF_OP_CMP:
8392 8399 case DIF_OP_SCMP:
8393 8400 if (r1 >= nregs)
8394 8401 err += efunc(pc, "invalid register %u\n", r1);
8395 8402 if (r2 >= nregs)
8396 8403 err += efunc(pc, "invalid register %u\n", r2);
8397 8404 if (rd != 0)
8398 8405 err += efunc(pc, "non-zero reserved bits\n");
8399 8406 break;
8400 8407 case DIF_OP_TST:
8401 8408 if (r1 >= nregs)
8402 8409 err += efunc(pc, "invalid register %u\n", r1);
8403 8410 if (r2 != 0 || rd != 0)
8404 8411 err += efunc(pc, "non-zero reserved bits\n");
8405 8412 break;
8406 8413 case DIF_OP_BA:
8407 8414 case DIF_OP_BE:
8408 8415 case DIF_OP_BNE:
8409 8416 case DIF_OP_BG:
8410 8417 case DIF_OP_BGU:
8411 8418 case DIF_OP_BGE:
8412 8419 case DIF_OP_BGEU:
8413 8420 case DIF_OP_BL:
8414 8421 case DIF_OP_BLU:
8415 8422 case DIF_OP_BLE:
8416 8423 case DIF_OP_BLEU:
8417 8424 if (label >= dp->dtdo_len) {
8418 8425 err += efunc(pc, "invalid branch target %u\n",
8419 8426 label);
8420 8427 }
8421 8428 if (label <= pc) {
8422 8429 err += efunc(pc, "backward branch to %u\n",
8423 8430 label);
8424 8431 }
8425 8432 break;
8426 8433 case DIF_OP_RET:
8427 8434 if (r1 != 0 || r2 != 0)
8428 8435 err += efunc(pc, "non-zero reserved bits\n");
8429 8436 if (rd >= nregs)
8430 8437 err += efunc(pc, "invalid register %u\n", rd);
8431 8438 break;
8432 8439 case DIF_OP_NOP:
8433 8440 case DIF_OP_POPTS:
8434 8441 case DIF_OP_FLUSHTS:
8435 8442 if (r1 != 0 || r2 != 0 || rd != 0)
8436 8443 err += efunc(pc, "non-zero reserved bits\n");
8437 8444 break;
8438 8445 case DIF_OP_SETX:
8439 8446 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
8440 8447 err += efunc(pc, "invalid integer ref %u\n",
8441 8448 DIF_INSTR_INTEGER(instr));
8442 8449 }
8443 8450 if (rd >= nregs)
8444 8451 err += efunc(pc, "invalid register %u\n", rd);
8445 8452 if (rd == 0)
8446 8453 err += efunc(pc, "cannot write to %r0\n");
8447 8454 break;
8448 8455 case DIF_OP_SETS:
8449 8456 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
8450 8457 err += efunc(pc, "invalid string ref %u\n",
8451 8458 DIF_INSTR_STRING(instr));
8452 8459 }
8453 8460 if (rd >= nregs)
8454 8461 err += efunc(pc, "invalid register %u\n", rd);
8455 8462 if (rd == 0)
8456 8463 err += efunc(pc, "cannot write to %r0\n");
8457 8464 break;
8458 8465 case DIF_OP_LDGA:
8459 8466 case DIF_OP_LDTA:
8460 8467 if (r1 > DIF_VAR_ARRAY_MAX)
8461 8468 err += efunc(pc, "invalid array %u\n", r1);
8462 8469 if (r2 >= nregs)
8463 8470 err += efunc(pc, "invalid register %u\n", r2);
8464 8471 if (rd >= nregs)
8465 8472 err += efunc(pc, "invalid register %u\n", rd);
8466 8473 if (rd == 0)
8467 8474 err += efunc(pc, "cannot write to %r0\n");
8468 8475 break;
8469 8476 case DIF_OP_LDGS:
8470 8477 case DIF_OP_LDTS:
8471 8478 case DIF_OP_LDLS:
8472 8479 case DIF_OP_LDGAA:
8473 8480 case DIF_OP_LDTAA:
8474 8481 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
8475 8482 err += efunc(pc, "invalid variable %u\n", v);
8476 8483 if (rd >= nregs)
8477 8484 err += efunc(pc, "invalid register %u\n", rd);
8478 8485 if (rd == 0)
8479 8486 err += efunc(pc, "cannot write to %r0\n");
8480 8487 break;
8481 8488 case DIF_OP_STGS:
8482 8489 case DIF_OP_STTS:
8483 8490 case DIF_OP_STLS:
8484 8491 case DIF_OP_STGAA:
8485 8492 case DIF_OP_STTAA:
8486 8493 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8487 8494 err += efunc(pc, "invalid variable %u\n", v);
8488 8495 if (rs >= nregs)
8489 8496 err += efunc(pc, "invalid register %u\n", rd);
8490 8497 break;
8491 8498 case DIF_OP_CALL:
8492 8499 if (subr > DIF_SUBR_MAX)
8493 8500 err += efunc(pc, "invalid subr %u\n", subr);
8494 8501 if (rd >= nregs)
8495 8502 err += efunc(pc, "invalid register %u\n", rd);
8496 8503 if (rd == 0)
8497 8504 err += efunc(pc, "cannot write to %r0\n");
8498 8505
8499 8506 if (subr == DIF_SUBR_COPYOUT ||
8500 8507 subr == DIF_SUBR_COPYOUTSTR) {
8501 8508 dp->dtdo_destructive = 1;
8502 8509 }
8503 8510
8504 8511 if (subr == DIF_SUBR_GETF) {
8505 8512 /*
8506 8513 * If we have a getf() we need to record that
8507 8514 * in our state. Note that our state can be
8508 8515 * NULL if this is a helper -- but in that
8509 8516 * case, the call to getf() is itself illegal,
8510 8517 * and will be caught (slightly later) when
8511 8518 * the helper is validated.
8512 8519 */
8513 8520 if (vstate->dtvs_state != NULL)
8514 8521 vstate->dtvs_state->dts_getf++;
8515 8522 }
8516 8523
8517 8524 break;
8518 8525 case DIF_OP_PUSHTR:
8519 8526 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8520 8527 err += efunc(pc, "invalid ref type %u\n", type);
8521 8528 if (r2 >= nregs)
8522 8529 err += efunc(pc, "invalid register %u\n", r2);
8523 8530 if (rs >= nregs)
8524 8531 err += efunc(pc, "invalid register %u\n", rs);
8525 8532 break;
8526 8533 case DIF_OP_PUSHTV:
8527 8534 if (type != DIF_TYPE_CTF)
8528 8535 err += efunc(pc, "invalid val type %u\n", type);
8529 8536 if (r2 >= nregs)
8530 8537 err += efunc(pc, "invalid register %u\n", r2);
8531 8538 if (rs >= nregs)
8532 8539 err += efunc(pc, "invalid register %u\n", rs);
8533 8540 break;
8534 8541 default:
8535 8542 err += efunc(pc, "invalid opcode %u\n",
8536 8543 DIF_INSTR_OP(instr));
8537 8544 }
8538 8545 }
8539 8546
8540 8547 if (dp->dtdo_len != 0 &&
8541 8548 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
8542 8549 err += efunc(dp->dtdo_len - 1,
8543 8550 "expected 'ret' as last DIF instruction\n");
8544 8551 }
8545 8552
8546 8553 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) {
8547 8554 /*
8548 8555 * If we're not returning by reference, the size must be either
8549 8556 * 0 or the size of one of the base types.
8550 8557 */
8551 8558 switch (dp->dtdo_rtype.dtdt_size) {
8552 8559 case 0:
8553 8560 case sizeof (uint8_t):
8554 8561 case sizeof (uint16_t):
8555 8562 case sizeof (uint32_t):
8556 8563 case sizeof (uint64_t):
8557 8564 break;
8558 8565
8559 8566 default:
8560 8567 err += efunc(dp->dtdo_len - 1, "bad return size\n");
8561 8568 }
8562 8569 }
8563 8570
8564 8571 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
8565 8572 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
8566 8573 dtrace_diftype_t *vt, *et;
8567 8574 uint_t id, ndx;
8568 8575
8569 8576 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
8570 8577 v->dtdv_scope != DIFV_SCOPE_THREAD &&
8571 8578 v->dtdv_scope != DIFV_SCOPE_LOCAL) {
8572 8579 err += efunc(i, "unrecognized variable scope %d\n",
8573 8580 v->dtdv_scope);
8574 8581 break;
8575 8582 }
8576 8583
8577 8584 if (v->dtdv_kind != DIFV_KIND_ARRAY &&
8578 8585 v->dtdv_kind != DIFV_KIND_SCALAR) {
8579 8586 err += efunc(i, "unrecognized variable type %d\n",
8580 8587 v->dtdv_kind);
8581 8588 break;
8582 8589 }
8583 8590
8584 8591 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
8585 8592 err += efunc(i, "%d exceeds variable id limit\n", id);
8586 8593 break;
8587 8594 }
8588 8595
8589 8596 if (id < DIF_VAR_OTHER_UBASE)
8590 8597 continue;
8591 8598
8592 8599 /*
8593 8600 * For user-defined variables, we need to check that this
8594 8601 * definition is identical to any previous definition that we
8595 8602 * encountered.
8596 8603 */
8597 8604 ndx = id - DIF_VAR_OTHER_UBASE;
8598 8605
8599 8606 switch (v->dtdv_scope) {
8600 8607 case DIFV_SCOPE_GLOBAL:
8601 8608 if (ndx < vstate->dtvs_nglobals) {
8602 8609 dtrace_statvar_t *svar;
8603 8610
8604 8611 if ((svar = vstate->dtvs_globals[ndx]) != NULL)
8605 8612 existing = &svar->dtsv_var;
8606 8613 }
8607 8614
8608 8615 break;
8609 8616
8610 8617 case DIFV_SCOPE_THREAD:
8611 8618 if (ndx < vstate->dtvs_ntlocals)
8612 8619 existing = &vstate->dtvs_tlocals[ndx];
8613 8620 break;
8614 8621
8615 8622 case DIFV_SCOPE_LOCAL:
8616 8623 if (ndx < vstate->dtvs_nlocals) {
8617 8624 dtrace_statvar_t *svar;
8618 8625
8619 8626 if ((svar = vstate->dtvs_locals[ndx]) != NULL)
8620 8627 existing = &svar->dtsv_var;
8621 8628 }
8622 8629
8623 8630 break;
8624 8631 }
8625 8632
8626 8633 vt = &v->dtdv_type;
8627 8634
8628 8635 if (vt->dtdt_flags & DIF_TF_BYREF) {
8629 8636 if (vt->dtdt_size == 0) {
8630 8637 err += efunc(i, "zero-sized variable\n");
8631 8638 break;
8632 8639 }
8633 8640
8634 8641 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL &&
8635 8642 vt->dtdt_size > dtrace_global_maxsize) {
8636 8643 err += efunc(i, "oversized by-ref global\n");
8637 8644 break;
8638 8645 }
8639 8646 }
8640 8647
8641 8648 if (existing == NULL || existing->dtdv_id == 0)
8642 8649 continue;
8643 8650
8644 8651 ASSERT(existing->dtdv_id == v->dtdv_id);
8645 8652 ASSERT(existing->dtdv_scope == v->dtdv_scope);
8646 8653
8647 8654 if (existing->dtdv_kind != v->dtdv_kind)
8648 8655 err += efunc(i, "%d changed variable kind\n", id);
8649 8656
8650 8657 et = &existing->dtdv_type;
8651 8658
8652 8659 if (vt->dtdt_flags != et->dtdt_flags) {
8653 8660 err += efunc(i, "%d changed variable type flags\n", id);
8654 8661 break;
8655 8662 }
8656 8663
8657 8664 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
8658 8665 err += efunc(i, "%d changed variable type size\n", id);
8659 8666 break;
8660 8667 }
8661 8668 }
8662 8669
8663 8670 return (err);
8664 8671 }
8665 8672
8666 8673 /*
8667 8674 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8668 8675 * are much more constrained than normal DIFOs. Specifically, they may
8669 8676 * not:
8670 8677 *
8671 8678 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8672 8679 * miscellaneous string routines
8673 8680 * 2. Access DTrace variables other than the args[] array, and the
8674 8681 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8675 8682 * 3. Have thread-local variables.
8676 8683 * 4. Have dynamic variables.
8677 8684 */
8678 8685 static int
8679 8686 dtrace_difo_validate_helper(dtrace_difo_t *dp)
8680 8687 {
8681 8688 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8682 8689 int err = 0;
8683 8690 uint_t pc;
8684 8691
8685 8692 for (pc = 0; pc < dp->dtdo_len; pc++) {
8686 8693 dif_instr_t instr = dp->dtdo_buf[pc];
8687 8694
8688 8695 uint_t v = DIF_INSTR_VAR(instr);
8689 8696 uint_t subr = DIF_INSTR_SUBR(instr);
8690 8697 uint_t op = DIF_INSTR_OP(instr);
8691 8698
8692 8699 switch (op) {
8693 8700 case DIF_OP_OR:
8694 8701 case DIF_OP_XOR:
8695 8702 case DIF_OP_AND:
8696 8703 case DIF_OP_SLL:
8697 8704 case DIF_OP_SRL:
8698 8705 case DIF_OP_SRA:
8699 8706 case DIF_OP_SUB:
8700 8707 case DIF_OP_ADD:
8701 8708 case DIF_OP_MUL:
8702 8709 case DIF_OP_SDIV:
8703 8710 case DIF_OP_UDIV:
8704 8711 case DIF_OP_SREM:
8705 8712 case DIF_OP_UREM:
8706 8713 case DIF_OP_COPYS:
8707 8714 case DIF_OP_NOT:
8708 8715 case DIF_OP_MOV:
8709 8716 case DIF_OP_RLDSB:
8710 8717 case DIF_OP_RLDSH:
8711 8718 case DIF_OP_RLDSW:
8712 8719 case DIF_OP_RLDUB:
8713 8720 case DIF_OP_RLDUH:
8714 8721 case DIF_OP_RLDUW:
8715 8722 case DIF_OP_RLDX:
8716 8723 case DIF_OP_ULDSB:
8717 8724 case DIF_OP_ULDSH:
8718 8725 case DIF_OP_ULDSW:
8719 8726 case DIF_OP_ULDUB:
8720 8727 case DIF_OP_ULDUH:
8721 8728 case DIF_OP_ULDUW:
8722 8729 case DIF_OP_ULDX:
8723 8730 case DIF_OP_STB:
8724 8731 case DIF_OP_STH:
8725 8732 case DIF_OP_STW:
8726 8733 case DIF_OP_STX:
8727 8734 case DIF_OP_ALLOCS:
8728 8735 case DIF_OP_CMP:
8729 8736 case DIF_OP_SCMP:
8730 8737 case DIF_OP_TST:
8731 8738 case DIF_OP_BA:
8732 8739 case DIF_OP_BE:
8733 8740 case DIF_OP_BNE:
8734 8741 case DIF_OP_BG:
8735 8742 case DIF_OP_BGU:
8736 8743 case DIF_OP_BGE:
8737 8744 case DIF_OP_BGEU:
8738 8745 case DIF_OP_BL:
8739 8746 case DIF_OP_BLU:
8740 8747 case DIF_OP_BLE:
8741 8748 case DIF_OP_BLEU:
8742 8749 case DIF_OP_RET:
8743 8750 case DIF_OP_NOP:
8744 8751 case DIF_OP_POPTS:
8745 8752 case DIF_OP_FLUSHTS:
8746 8753 case DIF_OP_SETX:
8747 8754 case DIF_OP_SETS:
8748 8755 case DIF_OP_LDGA:
8749 8756 case DIF_OP_LDLS:
8750 8757 case DIF_OP_STGS:
8751 8758 case DIF_OP_STLS:
8752 8759 case DIF_OP_PUSHTR:
8753 8760 case DIF_OP_PUSHTV:
8754 8761 break;
8755 8762
8756 8763 case DIF_OP_LDGS:
8757 8764 if (v >= DIF_VAR_OTHER_UBASE)
8758 8765 break;
8759 8766
8760 8767 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
8761 8768 break;
8762 8769
8763 8770 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
8764 8771 v == DIF_VAR_PPID || v == DIF_VAR_TID ||
8765 8772 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
8766 8773 v == DIF_VAR_UID || v == DIF_VAR_GID)
8767 8774 break;
8768 8775
8769 8776 err += efunc(pc, "illegal variable %u\n", v);
8770 8777 break;
8771 8778
8772 8779 case DIF_OP_LDTA:
8773 8780 case DIF_OP_LDTS:
8774 8781 case DIF_OP_LDGAA:
8775 8782 case DIF_OP_LDTAA:
8776 8783 err += efunc(pc, "illegal dynamic variable load\n");
8777 8784 break;
8778 8785
8779 8786 case DIF_OP_STTS:
8780 8787 case DIF_OP_STGAA:
8781 8788 case DIF_OP_STTAA:
8782 8789 err += efunc(pc, "illegal dynamic variable store\n");
8783 8790 break;
8784 8791
8785 8792 case DIF_OP_CALL:
8786 8793 if (subr == DIF_SUBR_ALLOCA ||
8787 8794 subr == DIF_SUBR_BCOPY ||
8788 8795 subr == DIF_SUBR_COPYIN ||
8789 8796 subr == DIF_SUBR_COPYINTO ||
8790 8797 subr == DIF_SUBR_COPYINSTR ||
8791 8798 subr == DIF_SUBR_INDEX ||
8792 8799 subr == DIF_SUBR_INET_NTOA ||
8793 8800 subr == DIF_SUBR_INET_NTOA6 ||
8794 8801 subr == DIF_SUBR_INET_NTOP ||
8795 8802 subr == DIF_SUBR_LLTOSTR ||
8796 8803 subr == DIF_SUBR_RINDEX ||
8797 8804 subr == DIF_SUBR_STRCHR ||
8798 8805 subr == DIF_SUBR_STRJOIN ||
8799 8806 subr == DIF_SUBR_STRRCHR ||
8800 8807 subr == DIF_SUBR_STRSTR ||
8801 8808 subr == DIF_SUBR_HTONS ||
8802 8809 subr == DIF_SUBR_HTONL ||
8803 8810 subr == DIF_SUBR_HTONLL ||
8804 8811 subr == DIF_SUBR_NTOHS ||
8805 8812 subr == DIF_SUBR_NTOHL ||
8806 8813 subr == DIF_SUBR_NTOHLL)
8807 8814 break;
8808 8815
8809 8816 err += efunc(pc, "invalid subr %u\n", subr);
8810 8817 break;
8811 8818
8812 8819 default:
8813 8820 err += efunc(pc, "invalid opcode %u\n",
8814 8821 DIF_INSTR_OP(instr));
8815 8822 }
8816 8823 }
8817 8824
8818 8825 return (err);
8819 8826 }
8820 8827
8821 8828 /*
8822 8829 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8823 8830 * basis; 0 if not.
8824 8831 */
8825 8832 static int
8826 8833 dtrace_difo_cacheable(dtrace_difo_t *dp)
8827 8834 {
8828 8835 int i;
8829 8836
8830 8837 if (dp == NULL)
8831 8838 return (0);
8832 8839
8833 8840 for (i = 0; i < dp->dtdo_varlen; i++) {
8834 8841 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8835 8842
8836 8843 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
8837 8844 continue;
8838 8845
8839 8846 switch (v->dtdv_id) {
8840 8847 case DIF_VAR_CURTHREAD:
8841 8848 case DIF_VAR_PID:
8842 8849 case DIF_VAR_TID:
8843 8850 case DIF_VAR_EXECNAME:
8844 8851 case DIF_VAR_ZONENAME:
8845 8852 break;
8846 8853
8847 8854 default:
8848 8855 return (0);
8849 8856 }
8850 8857 }
8851 8858
8852 8859 /*
8853 8860 * This DIF object may be cacheable. Now we need to look for any
8854 8861 * array loading instructions, any memory loading instructions, or
8855 8862 * any stores to thread-local variables.
8856 8863 */
8857 8864 for (i = 0; i < dp->dtdo_len; i++) {
8858 8865 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
8859 8866
8860 8867 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
8861 8868 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
8862 8869 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
8863 8870 op == DIF_OP_LDGA || op == DIF_OP_STTS)
8864 8871 return (0);
8865 8872 }
8866 8873
8867 8874 return (1);
8868 8875 }
8869 8876
8870 8877 static void
8871 8878 dtrace_difo_hold(dtrace_difo_t *dp)
8872 8879 {
8873 8880 int i;
8874 8881
8875 8882 ASSERT(MUTEX_HELD(&dtrace_lock));
8876 8883
8877 8884 dp->dtdo_refcnt++;
8878 8885 ASSERT(dp->dtdo_refcnt != 0);
8879 8886
8880 8887 /*
8881 8888 * We need to check this DIF object for references to the variable
8882 8889 * DIF_VAR_VTIMESTAMP.
8883 8890 */
8884 8891 for (i = 0; i < dp->dtdo_varlen; i++) {
8885 8892 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8886 8893
8887 8894 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
8888 8895 continue;
8889 8896
8890 8897 if (dtrace_vtime_references++ == 0)
8891 8898 dtrace_vtime_enable();
8892 8899 }
8893 8900 }
8894 8901
8895 8902 /*
8896 8903 * This routine calculates the dynamic variable chunksize for a given DIF
8897 8904 * object. The calculation is not fool-proof, and can probably be tricked by
8898 8905 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8899 8906 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8900 8907 * if a dynamic variable size exceeds the chunksize.
8901 8908 */
8902 8909 static void
8903 8910 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
8904 8911 {
8905 8912 uint64_t sval;
8906 8913 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
8907 8914 const dif_instr_t *text = dp->dtdo_buf;
8908 8915 uint_t pc, srd = 0;
8909 8916 uint_t ttop = 0;
8910 8917 size_t size, ksize;
8911 8918 uint_t id, i;
8912 8919
8913 8920 for (pc = 0; pc < dp->dtdo_len; pc++) {
8914 8921 dif_instr_t instr = text[pc];
8915 8922 uint_t op = DIF_INSTR_OP(instr);
8916 8923 uint_t rd = DIF_INSTR_RD(instr);
8917 8924 uint_t r1 = DIF_INSTR_R1(instr);
8918 8925 uint_t nkeys = 0;
8919 8926 uchar_t scope;
8920 8927
8921 8928 dtrace_key_t *key = tupregs;
8922 8929
8923 8930 switch (op) {
8924 8931 case DIF_OP_SETX:
8925 8932 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
8926 8933 srd = rd;
8927 8934 continue;
8928 8935
8929 8936 case DIF_OP_STTS:
8930 8937 key = &tupregs[DIF_DTR_NREGS];
8931 8938 key[0].dttk_size = 0;
8932 8939 key[1].dttk_size = 0;
8933 8940 nkeys = 2;
8934 8941 scope = DIFV_SCOPE_THREAD;
8935 8942 break;
8936 8943
8937 8944 case DIF_OP_STGAA:
8938 8945 case DIF_OP_STTAA:
8939 8946 nkeys = ttop;
8940 8947
8941 8948 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
8942 8949 key[nkeys++].dttk_size = 0;
8943 8950
8944 8951 key[nkeys++].dttk_size = 0;
8945 8952
8946 8953 if (op == DIF_OP_STTAA) {
8947 8954 scope = DIFV_SCOPE_THREAD;
8948 8955 } else {
8949 8956 scope = DIFV_SCOPE_GLOBAL;
8950 8957 }
8951 8958
8952 8959 break;
8953 8960
8954 8961 case DIF_OP_PUSHTR:
8955 8962 if (ttop == DIF_DTR_NREGS)
8956 8963 return;
8957 8964
8958 8965 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
8959 8966 /*
8960 8967 * If the register for the size of the "pushtr"
8961 8968 * is %r0 (or the value is 0) and the type is
8962 8969 * a string, we'll use the system-wide default
8963 8970 * string size.
8964 8971 */
8965 8972 tupregs[ttop++].dttk_size =
8966 8973 dtrace_strsize_default;
8967 8974 } else {
8968 8975 if (srd == 0)
8969 8976 return;
8970 8977
8971 8978 tupregs[ttop++].dttk_size = sval;
8972 8979 }
8973 8980
8974 8981 break;
8975 8982
8976 8983 case DIF_OP_PUSHTV:
8977 8984 if (ttop == DIF_DTR_NREGS)
8978 8985 return;
8979 8986
8980 8987 tupregs[ttop++].dttk_size = 0;
8981 8988 break;
8982 8989
8983 8990 case DIF_OP_FLUSHTS:
8984 8991 ttop = 0;
8985 8992 break;
8986 8993
8987 8994 case DIF_OP_POPTS:
8988 8995 if (ttop != 0)
8989 8996 ttop--;
8990 8997 break;
8991 8998 }
8992 8999
8993 9000 sval = 0;
8994 9001 srd = 0;
8995 9002
8996 9003 if (nkeys == 0)
8997 9004 continue;
8998 9005
8999 9006 /*
9000 9007 * We have a dynamic variable allocation; calculate its size.
9001 9008 */
9002 9009 for (ksize = 0, i = 0; i < nkeys; i++)
9003 9010 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
9004 9011
9005 9012 size = sizeof (dtrace_dynvar_t);
9006 9013 size += sizeof (dtrace_key_t) * (nkeys - 1);
9007 9014 size += ksize;
9008 9015
9009 9016 /*
9010 9017 * Now we need to determine the size of the stored data.
9011 9018 */
9012 9019 id = DIF_INSTR_VAR(instr);
9013 9020
9014 9021 for (i = 0; i < dp->dtdo_varlen; i++) {
9015 9022 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9016 9023
9017 9024 if (v->dtdv_id == id && v->dtdv_scope == scope) {
9018 9025 size += v->dtdv_type.dtdt_size;
9019 9026 break;
9020 9027 }
9021 9028 }
9022 9029
9023 9030 if (i == dp->dtdo_varlen)
9024 9031 return;
9025 9032
9026 9033 /*
9027 9034 * We have the size. If this is larger than the chunk size
9028 9035 * for our dynamic variable state, reset the chunk size.
9029 9036 */
9030 9037 size = P2ROUNDUP(size, sizeof (uint64_t));
9031 9038
9032 9039 if (size > vstate->dtvs_dynvars.dtds_chunksize)
9033 9040 vstate->dtvs_dynvars.dtds_chunksize = size;
9034 9041 }
9035 9042 }
9036 9043
9037 9044 static void
9038 9045 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9039 9046 {
9040 9047 int i, oldsvars, osz, nsz, otlocals, ntlocals;
9041 9048 uint_t id;
9042 9049
9043 9050 ASSERT(MUTEX_HELD(&dtrace_lock));
9044 9051 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
9045 9052
9046 9053 for (i = 0; i < dp->dtdo_varlen; i++) {
9047 9054 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9048 9055 dtrace_statvar_t *svar, ***svarp;
9049 9056 size_t dsize = 0;
9050 9057 uint8_t scope = v->dtdv_scope;
9051 9058 int *np;
9052 9059
9053 9060 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9054 9061 continue;
9055 9062
9056 9063 id -= DIF_VAR_OTHER_UBASE;
9057 9064
9058 9065 switch (scope) {
9059 9066 case DIFV_SCOPE_THREAD:
9060 9067 while (id >= (otlocals = vstate->dtvs_ntlocals)) {
9061 9068 dtrace_difv_t *tlocals;
9062 9069
9063 9070 if ((ntlocals = (otlocals << 1)) == 0)
9064 9071 ntlocals = 1;
9065 9072
9066 9073 osz = otlocals * sizeof (dtrace_difv_t);
9067 9074 nsz = ntlocals * sizeof (dtrace_difv_t);
9068 9075
9069 9076 tlocals = kmem_zalloc(nsz, KM_SLEEP);
9070 9077
9071 9078 if (osz != 0) {
9072 9079 bcopy(vstate->dtvs_tlocals,
9073 9080 tlocals, osz);
9074 9081 kmem_free(vstate->dtvs_tlocals, osz);
9075 9082 }
9076 9083
9077 9084 vstate->dtvs_tlocals = tlocals;
9078 9085 vstate->dtvs_ntlocals = ntlocals;
9079 9086 }
9080 9087
9081 9088 vstate->dtvs_tlocals[id] = *v;
9082 9089 continue;
9083 9090
9084 9091 case DIFV_SCOPE_LOCAL:
9085 9092 np = &vstate->dtvs_nlocals;
9086 9093 svarp = &vstate->dtvs_locals;
9087 9094
9088 9095 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
9089 9096 dsize = NCPU * (v->dtdv_type.dtdt_size +
9090 9097 sizeof (uint64_t));
9091 9098 else
9092 9099 dsize = NCPU * sizeof (uint64_t);
9093 9100
9094 9101 break;
9095 9102
9096 9103 case DIFV_SCOPE_GLOBAL:
9097 9104 np = &vstate->dtvs_nglobals;
9098 9105 svarp = &vstate->dtvs_globals;
9099 9106
9100 9107 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
9101 9108 dsize = v->dtdv_type.dtdt_size +
9102 9109 sizeof (uint64_t);
9103 9110
9104 9111 break;
9105 9112
9106 9113 default:
9107 9114 ASSERT(0);
9108 9115 }
9109 9116
9110 9117 while (id >= (oldsvars = *np)) {
9111 9118 dtrace_statvar_t **statics;
9112 9119 int newsvars, oldsize, newsize;
9113 9120
9114 9121 if ((newsvars = (oldsvars << 1)) == 0)
9115 9122 newsvars = 1;
9116 9123
9117 9124 oldsize = oldsvars * sizeof (dtrace_statvar_t *);
9118 9125 newsize = newsvars * sizeof (dtrace_statvar_t *);
9119 9126
9120 9127 statics = kmem_zalloc(newsize, KM_SLEEP);
9121 9128
9122 9129 if (oldsize != 0) {
9123 9130 bcopy(*svarp, statics, oldsize);
9124 9131 kmem_free(*svarp, oldsize);
9125 9132 }
9126 9133
9127 9134 *svarp = statics;
9128 9135 *np = newsvars;
9129 9136 }
9130 9137
9131 9138 if ((svar = (*svarp)[id]) == NULL) {
9132 9139 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
9133 9140 svar->dtsv_var = *v;
9134 9141
9135 9142 if ((svar->dtsv_size = dsize) != 0) {
9136 9143 svar->dtsv_data = (uint64_t)(uintptr_t)
9137 9144 kmem_zalloc(dsize, KM_SLEEP);
9138 9145 }
9139 9146
9140 9147 (*svarp)[id] = svar;
9141 9148 }
9142 9149
9143 9150 svar->dtsv_refcnt++;
9144 9151 }
9145 9152
9146 9153 dtrace_difo_chunksize(dp, vstate);
9147 9154 dtrace_difo_hold(dp);
9148 9155 }
9149 9156
9150 9157 static dtrace_difo_t *
9151 9158 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9152 9159 {
9153 9160 dtrace_difo_t *new;
9154 9161 size_t sz;
9155 9162
9156 9163 ASSERT(dp->dtdo_buf != NULL);
9157 9164 ASSERT(dp->dtdo_refcnt != 0);
9158 9165
9159 9166 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
9160 9167
9161 9168 ASSERT(dp->dtdo_buf != NULL);
9162 9169 sz = dp->dtdo_len * sizeof (dif_instr_t);
9163 9170 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
9164 9171 bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
9165 9172 new->dtdo_len = dp->dtdo_len;
9166 9173
9167 9174 if (dp->dtdo_strtab != NULL) {
9168 9175 ASSERT(dp->dtdo_strlen != 0);
9169 9176 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
9170 9177 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
9171 9178 new->dtdo_strlen = dp->dtdo_strlen;
9172 9179 }
9173 9180
9174 9181 if (dp->dtdo_inttab != NULL) {
9175 9182 ASSERT(dp->dtdo_intlen != 0);
9176 9183 sz = dp->dtdo_intlen * sizeof (uint64_t);
9177 9184 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
9178 9185 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
9179 9186 new->dtdo_intlen = dp->dtdo_intlen;
9180 9187 }
9181 9188
9182 9189 if (dp->dtdo_vartab != NULL) {
9183 9190 ASSERT(dp->dtdo_varlen != 0);
9184 9191 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
9185 9192 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
9186 9193 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
9187 9194 new->dtdo_varlen = dp->dtdo_varlen;
9188 9195 }
9189 9196
9190 9197 dtrace_difo_init(new, vstate);
9191 9198 return (new);
9192 9199 }
9193 9200
9194 9201 static void
9195 9202 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9196 9203 {
9197 9204 int i;
9198 9205
9199 9206 ASSERT(dp->dtdo_refcnt == 0);
9200 9207
9201 9208 for (i = 0; i < dp->dtdo_varlen; i++) {
9202 9209 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9203 9210 dtrace_statvar_t *svar, **svarp;
9204 9211 uint_t id;
9205 9212 uint8_t scope = v->dtdv_scope;
9206 9213 int *np;
9207 9214
9208 9215 switch (scope) {
9209 9216 case DIFV_SCOPE_THREAD:
9210 9217 continue;
9211 9218
9212 9219 case DIFV_SCOPE_LOCAL:
9213 9220 np = &vstate->dtvs_nlocals;
9214 9221 svarp = vstate->dtvs_locals;
9215 9222 break;
9216 9223
9217 9224 case DIFV_SCOPE_GLOBAL:
9218 9225 np = &vstate->dtvs_nglobals;
9219 9226 svarp = vstate->dtvs_globals;
9220 9227 break;
9221 9228
9222 9229 default:
9223 9230 ASSERT(0);
9224 9231 }
9225 9232
9226 9233 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9227 9234 continue;
9228 9235
9229 9236 id -= DIF_VAR_OTHER_UBASE;
9230 9237 ASSERT(id < *np);
9231 9238
9232 9239 svar = svarp[id];
9233 9240 ASSERT(svar != NULL);
9234 9241 ASSERT(svar->dtsv_refcnt > 0);
9235 9242
9236 9243 if (--svar->dtsv_refcnt > 0)
9237 9244 continue;
9238 9245
9239 9246 if (svar->dtsv_size != 0) {
9240 9247 ASSERT(svar->dtsv_data != NULL);
9241 9248 kmem_free((void *)(uintptr_t)svar->dtsv_data,
9242 9249 svar->dtsv_size);
9243 9250 }
9244 9251
9245 9252 kmem_free(svar, sizeof (dtrace_statvar_t));
9246 9253 svarp[id] = NULL;
9247 9254 }
9248 9255
9249 9256 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
9250 9257 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
9251 9258 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
9252 9259 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
9253 9260
9254 9261 kmem_free(dp, sizeof (dtrace_difo_t));
9255 9262 }
9256 9263
9257 9264 static void
9258 9265 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9259 9266 {
9260 9267 int i;
9261 9268
9262 9269 ASSERT(MUTEX_HELD(&dtrace_lock));
9263 9270 ASSERT(dp->dtdo_refcnt != 0);
9264 9271
9265 9272 for (i = 0; i < dp->dtdo_varlen; i++) {
9266 9273 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9267 9274
9268 9275 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9269 9276 continue;
9270 9277
9271 9278 ASSERT(dtrace_vtime_references > 0);
9272 9279 if (--dtrace_vtime_references == 0)
9273 9280 dtrace_vtime_disable();
9274 9281 }
9275 9282
9276 9283 if (--dp->dtdo_refcnt == 0)
9277 9284 dtrace_difo_destroy(dp, vstate);
9278 9285 }
9279 9286
9280 9287 /*
9281 9288 * DTrace Format Functions
9282 9289 */
9283 9290 static uint16_t
9284 9291 dtrace_format_add(dtrace_state_t *state, char *str)
9285 9292 {
9286 9293 char *fmt, **new;
9287 9294 uint16_t ndx, len = strlen(str) + 1;
9288 9295
9289 9296 fmt = kmem_zalloc(len, KM_SLEEP);
9290 9297 bcopy(str, fmt, len);
9291 9298
9292 9299 for (ndx = 0; ndx < state->dts_nformats; ndx++) {
9293 9300 if (state->dts_formats[ndx] == NULL) {
9294 9301 state->dts_formats[ndx] = fmt;
9295 9302 return (ndx + 1);
9296 9303 }
9297 9304 }
9298 9305
9299 9306 if (state->dts_nformats == USHRT_MAX) {
9300 9307 /*
9301 9308 * This is only likely if a denial-of-service attack is being
9302 9309 * attempted. As such, it's okay to fail silently here.
9303 9310 */
9304 9311 kmem_free(fmt, len);
9305 9312 return (0);
9306 9313 }
9307 9314
9308 9315 /*
9309 9316 * For simplicity, we always resize the formats array to be exactly the
9310 9317 * number of formats.
9311 9318 */
9312 9319 ndx = state->dts_nformats++;
9313 9320 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
9314 9321
9315 9322 if (state->dts_formats != NULL) {
9316 9323 ASSERT(ndx != 0);
9317 9324 bcopy(state->dts_formats, new, ndx * sizeof (char *));
9318 9325 kmem_free(state->dts_formats, ndx * sizeof (char *));
9319 9326 }
9320 9327
9321 9328 state->dts_formats = new;
9322 9329 state->dts_formats[ndx] = fmt;
9323 9330
9324 9331 return (ndx + 1);
9325 9332 }
9326 9333
9327 9334 static void
9328 9335 dtrace_format_remove(dtrace_state_t *state, uint16_t format)
9329 9336 {
9330 9337 char *fmt;
9331 9338
9332 9339 ASSERT(state->dts_formats != NULL);
9333 9340 ASSERT(format <= state->dts_nformats);
9334 9341 ASSERT(state->dts_formats[format - 1] != NULL);
9335 9342
9336 9343 fmt = state->dts_formats[format - 1];
9337 9344 kmem_free(fmt, strlen(fmt) + 1);
9338 9345 state->dts_formats[format - 1] = NULL;
9339 9346 }
9340 9347
9341 9348 static void
9342 9349 dtrace_format_destroy(dtrace_state_t *state)
9343 9350 {
9344 9351 int i;
9345 9352
9346 9353 if (state->dts_nformats == 0) {
9347 9354 ASSERT(state->dts_formats == NULL);
9348 9355 return;
9349 9356 }
9350 9357
9351 9358 ASSERT(state->dts_formats != NULL);
9352 9359
9353 9360 for (i = 0; i < state->dts_nformats; i++) {
9354 9361 char *fmt = state->dts_formats[i];
9355 9362
9356 9363 if (fmt == NULL)
9357 9364 continue;
9358 9365
9359 9366 kmem_free(fmt, strlen(fmt) + 1);
9360 9367 }
9361 9368
9362 9369 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
9363 9370 state->dts_nformats = 0;
9364 9371 state->dts_formats = NULL;
9365 9372 }
9366 9373
9367 9374 /*
9368 9375 * DTrace Predicate Functions
9369 9376 */
9370 9377 static dtrace_predicate_t *
9371 9378 dtrace_predicate_create(dtrace_difo_t *dp)
9372 9379 {
9373 9380 dtrace_predicate_t *pred;
9374 9381
9375 9382 ASSERT(MUTEX_HELD(&dtrace_lock));
9376 9383 ASSERT(dp->dtdo_refcnt != 0);
9377 9384
9378 9385 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
9379 9386 pred->dtp_difo = dp;
9380 9387 pred->dtp_refcnt = 1;
9381 9388
9382 9389 if (!dtrace_difo_cacheable(dp))
9383 9390 return (pred);
9384 9391
9385 9392 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
9386 9393 /*
9387 9394 * This is only theoretically possible -- we have had 2^32
9388 9395 * cacheable predicates on this machine. We cannot allow any
9389 9396 * more predicates to become cacheable: as unlikely as it is,
9390 9397 * there may be a thread caching a (now stale) predicate cache
9391 9398 * ID. (N.B.: the temptation is being successfully resisted to
9392 9399 * have this cmn_err() "Holy shit -- we executed this code!")
9393 9400 */
9394 9401 return (pred);
9395 9402 }
9396 9403
9397 9404 pred->dtp_cacheid = dtrace_predcache_id++;
9398 9405
9399 9406 return (pred);
9400 9407 }
9401 9408
9402 9409 static void
9403 9410 dtrace_predicate_hold(dtrace_predicate_t *pred)
9404 9411 {
9405 9412 ASSERT(MUTEX_HELD(&dtrace_lock));
9406 9413 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
9407 9414 ASSERT(pred->dtp_refcnt > 0);
9408 9415
9409 9416 pred->dtp_refcnt++;
9410 9417 }
9411 9418
9412 9419 static void
9413 9420 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
9414 9421 {
9415 9422 dtrace_difo_t *dp = pred->dtp_difo;
9416 9423
9417 9424 ASSERT(MUTEX_HELD(&dtrace_lock));
9418 9425 ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
9419 9426 ASSERT(pred->dtp_refcnt > 0);
9420 9427
9421 9428 if (--pred->dtp_refcnt == 0) {
9422 9429 dtrace_difo_release(pred->dtp_difo, vstate);
9423 9430 kmem_free(pred, sizeof (dtrace_predicate_t));
9424 9431 }
9425 9432 }
9426 9433
9427 9434 /*
9428 9435 * DTrace Action Description Functions
9429 9436 */
9430 9437 static dtrace_actdesc_t *
9431 9438 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
9432 9439 uint64_t uarg, uint64_t arg)
9433 9440 {
9434 9441 dtrace_actdesc_t *act;
9435 9442
9436 9443 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
9437 9444 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
9438 9445
9439 9446 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
9440 9447 act->dtad_kind = kind;
9441 9448 act->dtad_ntuple = ntuple;
9442 9449 act->dtad_uarg = uarg;
9443 9450 act->dtad_arg = arg;
9444 9451 act->dtad_refcnt = 1;
9445 9452
9446 9453 return (act);
9447 9454 }
9448 9455
9449 9456 static void
9450 9457 dtrace_actdesc_hold(dtrace_actdesc_t *act)
9451 9458 {
9452 9459 ASSERT(act->dtad_refcnt >= 1);
9453 9460 act->dtad_refcnt++;
9454 9461 }
9455 9462
9456 9463 static void
9457 9464 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
9458 9465 {
9459 9466 dtrace_actkind_t kind = act->dtad_kind;
9460 9467 dtrace_difo_t *dp;
9461 9468
9462 9469 ASSERT(act->dtad_refcnt >= 1);
9463 9470
9464 9471 if (--act->dtad_refcnt != 0)
9465 9472 return;
9466 9473
9467 9474 if ((dp = act->dtad_difo) != NULL)
9468 9475 dtrace_difo_release(dp, vstate);
9469 9476
9470 9477 if (DTRACEACT_ISPRINTFLIKE(kind)) {
9471 9478 char *str = (char *)(uintptr_t)act->dtad_arg;
9472 9479
9473 9480 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
9474 9481 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
9475 9482
9476 9483 if (str != NULL)
9477 9484 kmem_free(str, strlen(str) + 1);
9478 9485 }
9479 9486
9480 9487 kmem_free(act, sizeof (dtrace_actdesc_t));
9481 9488 }
9482 9489
9483 9490 /*
9484 9491 * DTrace ECB Functions
9485 9492 */
9486 9493 static dtrace_ecb_t *
9487 9494 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
9488 9495 {
9489 9496 dtrace_ecb_t *ecb;
9490 9497 dtrace_epid_t epid;
9491 9498
9492 9499 ASSERT(MUTEX_HELD(&dtrace_lock));
9493 9500
9494 9501 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
9495 9502 ecb->dte_predicate = NULL;
9496 9503 ecb->dte_probe = probe;
9497 9504
9498 9505 /*
9499 9506 * The default size is the size of the default action: recording
9500 9507 * the epid.
9501 9508 */
9502 9509 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
9503 9510 ecb->dte_alignment = sizeof (dtrace_epid_t);
9504 9511
9505 9512 epid = state->dts_epid++;
9506 9513
9507 9514 if (epid - 1 >= state->dts_necbs) {
9508 9515 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
9509 9516 int necbs = state->dts_necbs << 1;
9510 9517
9511 9518 ASSERT(epid == state->dts_necbs + 1);
9512 9519
9513 9520 if (necbs == 0) {
9514 9521 ASSERT(oecbs == NULL);
9515 9522 necbs = 1;
9516 9523 }
9517 9524
9518 9525 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
9519 9526
9520 9527 if (oecbs != NULL)
9521 9528 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
9522 9529
9523 9530 dtrace_membar_producer();
9524 9531 state->dts_ecbs = ecbs;
9525 9532
9526 9533 if (oecbs != NULL) {
9527 9534 /*
9528 9535 * If this state is active, we must dtrace_sync()
9529 9536 * before we can free the old dts_ecbs array: we're
9530 9537 * coming in hot, and there may be active ring
9531 9538 * buffer processing (which indexes into the dts_ecbs
9532 9539 * array) on another CPU.
9533 9540 */
9534 9541 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
9535 9542 dtrace_sync();
9536 9543
9537 9544 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
9538 9545 }
9539 9546
9540 9547 dtrace_membar_producer();
9541 9548 state->dts_necbs = necbs;
9542 9549 }
9543 9550
9544 9551 ecb->dte_state = state;
9545 9552
9546 9553 ASSERT(state->dts_ecbs[epid - 1] == NULL);
9547 9554 dtrace_membar_producer();
9548 9555 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
9549 9556
9550 9557 return (ecb);
9551 9558 }
9552 9559
9553 9560 static int
9554 9561 dtrace_ecb_enable(dtrace_ecb_t *ecb)
9555 9562 {
9556 9563 dtrace_probe_t *probe = ecb->dte_probe;
9557 9564
9558 9565 ASSERT(MUTEX_HELD(&cpu_lock));
9559 9566 ASSERT(MUTEX_HELD(&dtrace_lock));
9560 9567 ASSERT(ecb->dte_next == NULL);
9561 9568
9562 9569 if (probe == NULL) {
9563 9570 /*
9564 9571 * This is the NULL probe -- there's nothing to do.
9565 9572 */
9566 9573 return (0);
9567 9574 }
9568 9575
9569 9576 if (probe->dtpr_ecb == NULL) {
9570 9577 dtrace_provider_t *prov = probe->dtpr_provider;
9571 9578
9572 9579 /*
9573 9580 * We're the first ECB on this probe.
9574 9581 */
9575 9582 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
9576 9583
9577 9584 if (ecb->dte_predicate != NULL)
9578 9585 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
9579 9586
9580 9587 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
9581 9588 probe->dtpr_id, probe->dtpr_arg));
9582 9589 } else {
9583 9590 /*
9584 9591 * This probe is already active. Swing the last pointer to
9585 9592 * point to the new ECB, and issue a dtrace_sync() to assure
9586 9593 * that all CPUs have seen the change.
9587 9594 */
9588 9595 ASSERT(probe->dtpr_ecb_last != NULL);
9589 9596 probe->dtpr_ecb_last->dte_next = ecb;
9590 9597 probe->dtpr_ecb_last = ecb;
9591 9598 probe->dtpr_predcache = 0;
9592 9599
9593 9600 dtrace_sync();
9594 9601 return (0);
9595 9602 }
9596 9603 }
9597 9604
9598 9605 static void
9599 9606 dtrace_ecb_resize(dtrace_ecb_t *ecb)
9600 9607 {
9601 9608 uint32_t maxalign = sizeof (dtrace_epid_t);
9602 9609 uint32_t align = sizeof (uint8_t), offs, diff;
9603 9610 dtrace_action_t *act;
9604 9611 int wastuple = 0;
9605 9612 uint32_t aggbase = UINT32_MAX;
9606 9613 dtrace_state_t *state = ecb->dte_state;
9607 9614
9608 9615 /*
9609 9616 * If we record anything, we always record the epid. (And we always
9610 9617 * record it first.)
9611 9618 */
9612 9619 offs = sizeof (dtrace_epid_t);
9613 9620 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
9614 9621
9615 9622 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
9616 9623 dtrace_recdesc_t *rec = &act->dta_rec;
9617 9624
9618 9625 if ((align = rec->dtrd_alignment) > maxalign)
9619 9626 maxalign = align;
9620 9627
9621 9628 if (!wastuple && act->dta_intuple) {
9622 9629 /*
9623 9630 * This is the first record in a tuple. Align the
9624 9631 * offset to be at offset 4 in an 8-byte aligned
9625 9632 * block.
9626 9633 */
9627 9634 diff = offs + sizeof (dtrace_aggid_t);
9628 9635
9629 9636 if (diff = (diff & (sizeof (uint64_t) - 1)))
9630 9637 offs += sizeof (uint64_t) - diff;
9631 9638
9632 9639 aggbase = offs - sizeof (dtrace_aggid_t);
9633 9640 ASSERT(!(aggbase & (sizeof (uint64_t) - 1)));
9634 9641 }
9635 9642
9636 9643 /*LINTED*/
9637 9644 if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) {
9638 9645 /*
9639 9646 * The current offset is not properly aligned; align it.
9640 9647 */
9641 9648 offs += align - diff;
9642 9649 }
9643 9650
9644 9651 rec->dtrd_offset = offs;
9645 9652
9646 9653 if (offs + rec->dtrd_size > ecb->dte_needed) {
9647 9654 ecb->dte_needed = offs + rec->dtrd_size;
9648 9655
9649 9656 if (ecb->dte_needed > state->dts_needed)
9650 9657 state->dts_needed = ecb->dte_needed;
9651 9658 }
9652 9659
9653 9660 if (DTRACEACT_ISAGG(act->dta_kind)) {
9654 9661 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
9655 9662 dtrace_action_t *first = agg->dtag_first, *prev;
9656 9663
9657 9664 ASSERT(rec->dtrd_size != 0 && first != NULL);
9658 9665 ASSERT(wastuple);
9659 9666 ASSERT(aggbase != UINT32_MAX);
9660 9667
9661 9668 agg->dtag_base = aggbase;
9662 9669
9663 9670 while ((prev = first->dta_prev) != NULL &&
9664 9671 DTRACEACT_ISAGG(prev->dta_kind)) {
9665 9672 agg = (dtrace_aggregation_t *)prev;
9666 9673 first = agg->dtag_first;
9667 9674 }
9668 9675
9669 9676 if (prev != NULL) {
9670 9677 offs = prev->dta_rec.dtrd_offset +
9671 9678 prev->dta_rec.dtrd_size;
9672 9679 } else {
9673 9680 offs = sizeof (dtrace_epid_t);
9674 9681 }
9675 9682 wastuple = 0;
9676 9683 } else {
9677 9684 if (!act->dta_intuple)
9678 9685 ecb->dte_size = offs + rec->dtrd_size;
9679 9686
9680 9687 offs += rec->dtrd_size;
9681 9688 }
9682 9689
9683 9690 wastuple = act->dta_intuple;
9684 9691 }
9685 9692
9686 9693 if ((act = ecb->dte_action) != NULL &&
9687 9694 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
9688 9695 ecb->dte_size == sizeof (dtrace_epid_t)) {
9689 9696 /*
9690 9697 * If the size is still sizeof (dtrace_epid_t), then all
9691 9698 * actions store no data; set the size to 0.
9692 9699 */
9693 9700 ecb->dte_alignment = maxalign;
9694 9701 ecb->dte_size = 0;
9695 9702
9696 9703 /*
9697 9704 * If the needed space is still sizeof (dtrace_epid_t), then
9698 9705 * all actions need no additional space; set the needed
9699 9706 * size to 0.
9700 9707 */
9701 9708 if (ecb->dte_needed == sizeof (dtrace_epid_t))
9702 9709 ecb->dte_needed = 0;
9703 9710
9704 9711 return;
9705 9712 }
9706 9713
9707 9714 /*
9708 9715 * Set our alignment, and make sure that the dte_size and dte_needed
9709 9716 * are aligned to the size of an EPID.
9710 9717 */
9711 9718 ecb->dte_alignment = maxalign;
9712 9719 ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) &
9713 9720 ~(sizeof (dtrace_epid_t) - 1);
9714 9721 ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) &
9715 9722 ~(sizeof (dtrace_epid_t) - 1);
9716 9723 ASSERT(ecb->dte_size <= ecb->dte_needed);
9717 9724 }
9718 9725
9719 9726 static dtrace_action_t *
9720 9727 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
9721 9728 {
9722 9729 dtrace_aggregation_t *agg;
9723 9730 size_t size = sizeof (uint64_t);
9724 9731 int ntuple = desc->dtad_ntuple;
9725 9732 dtrace_action_t *act;
9726 9733 dtrace_recdesc_t *frec;
9727 9734 dtrace_aggid_t aggid;
9728 9735 dtrace_state_t *state = ecb->dte_state;
9729 9736
9730 9737 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
9731 9738 agg->dtag_ecb = ecb;
9732 9739
9733 9740 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
9734 9741
9735 9742 switch (desc->dtad_kind) {
9736 9743 case DTRACEAGG_MIN:
9737 9744 agg->dtag_initial = INT64_MAX;
9738 9745 agg->dtag_aggregate = dtrace_aggregate_min;
9739 9746 break;
9740 9747
9741 9748 case DTRACEAGG_MAX:
9742 9749 agg->dtag_initial = INT64_MIN;
9743 9750 agg->dtag_aggregate = dtrace_aggregate_max;
9744 9751 break;
9745 9752
9746 9753 case DTRACEAGG_COUNT:
9747 9754 agg->dtag_aggregate = dtrace_aggregate_count;
9748 9755 break;
9749 9756
9750 9757 case DTRACEAGG_QUANTIZE:
9751 9758 agg->dtag_aggregate = dtrace_aggregate_quantize;
9752 9759 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
9753 9760 sizeof (uint64_t);
9754 9761 break;
9755 9762
9756 9763 case DTRACEAGG_LQUANTIZE: {
9757 9764 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
9758 9765 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
9759 9766
9760 9767 agg->dtag_initial = desc->dtad_arg;
9761 9768 agg->dtag_aggregate = dtrace_aggregate_lquantize;
9762 9769
9763 9770 if (step == 0 || levels == 0)
9764 9771 goto err;
9765 9772
9766 9773 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
9767 9774 break;
9768 9775 }
9769 9776
9770 9777 case DTRACEAGG_LLQUANTIZE: {
9771 9778 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
9772 9779 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
9773 9780 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
9774 9781 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
9775 9782 int64_t v;
9776 9783
9777 9784 agg->dtag_initial = desc->dtad_arg;
9778 9785 agg->dtag_aggregate = dtrace_aggregate_llquantize;
9779 9786
9780 9787 if (factor < 2 || low >= high || nsteps < factor)
9781 9788 goto err;
9782 9789
9783 9790 /*
9784 9791 * Now check that the number of steps evenly divides a power
9785 9792 * of the factor. (This assures both integer bucket size and
9786 9793 * linearity within each magnitude.)
9787 9794 */
9788 9795 for (v = factor; v < nsteps; v *= factor)
9789 9796 continue;
9790 9797
9791 9798 if ((v % nsteps) || (nsteps % factor))
9792 9799 goto err;
9793 9800
9794 9801 size = (dtrace_aggregate_llquantize_bucket(factor,
9795 9802 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
9796 9803 break;
9797 9804 }
9798 9805
9799 9806 case DTRACEAGG_AVG:
9800 9807 agg->dtag_aggregate = dtrace_aggregate_avg;
9801 9808 size = sizeof (uint64_t) * 2;
9802 9809 break;
9803 9810
9804 9811 case DTRACEAGG_STDDEV:
9805 9812 agg->dtag_aggregate = dtrace_aggregate_stddev;
9806 9813 size = sizeof (uint64_t) * 4;
9807 9814 break;
9808 9815
9809 9816 case DTRACEAGG_SUM:
9810 9817 agg->dtag_aggregate = dtrace_aggregate_sum;
9811 9818 break;
9812 9819
9813 9820 default:
9814 9821 goto err;
9815 9822 }
9816 9823
9817 9824 agg->dtag_action.dta_rec.dtrd_size = size;
9818 9825
9819 9826 if (ntuple == 0)
9820 9827 goto err;
9821 9828
9822 9829 /*
9823 9830 * We must make sure that we have enough actions for the n-tuple.
9824 9831 */
9825 9832 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
9826 9833 if (DTRACEACT_ISAGG(act->dta_kind))
9827 9834 break;
9828 9835
9829 9836 if (--ntuple == 0) {
9830 9837 /*
9831 9838 * This is the action with which our n-tuple begins.
9832 9839 */
9833 9840 agg->dtag_first = act;
9834 9841 goto success;
9835 9842 }
9836 9843 }
9837 9844
9838 9845 /*
9839 9846 * This n-tuple is short by ntuple elements. Return failure.
9840 9847 */
9841 9848 ASSERT(ntuple != 0);
9842 9849 err:
9843 9850 kmem_free(agg, sizeof (dtrace_aggregation_t));
9844 9851 return (NULL);
9845 9852
9846 9853 success:
9847 9854 /*
9848 9855 * If the last action in the tuple has a size of zero, it's actually
9849 9856 * an expression argument for the aggregating action.
9850 9857 */
9851 9858 ASSERT(ecb->dte_action_last != NULL);
9852 9859 act = ecb->dte_action_last;
9853 9860
9854 9861 if (act->dta_kind == DTRACEACT_DIFEXPR) {
9855 9862 ASSERT(act->dta_difo != NULL);
9856 9863
9857 9864 if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
9858 9865 agg->dtag_hasarg = 1;
9859 9866 }
9860 9867
9861 9868 /*
9862 9869 * We need to allocate an id for this aggregation.
9863 9870 */
9864 9871 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
9865 9872 VM_BESTFIT | VM_SLEEP);
9866 9873
9867 9874 if (aggid - 1 >= state->dts_naggregations) {
9868 9875 dtrace_aggregation_t **oaggs = state->dts_aggregations;
9869 9876 dtrace_aggregation_t **aggs;
9870 9877 int naggs = state->dts_naggregations << 1;
9871 9878 int onaggs = state->dts_naggregations;
9872 9879
9873 9880 ASSERT(aggid == state->dts_naggregations + 1);
9874 9881
9875 9882 if (naggs == 0) {
9876 9883 ASSERT(oaggs == NULL);
9877 9884 naggs = 1;
9878 9885 }
9879 9886
9880 9887 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
9881 9888
9882 9889 if (oaggs != NULL) {
9883 9890 bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
9884 9891 kmem_free(oaggs, onaggs * sizeof (*aggs));
9885 9892 }
9886 9893
9887 9894 state->dts_aggregations = aggs;
9888 9895 state->dts_naggregations = naggs;
9889 9896 }
9890 9897
9891 9898 ASSERT(state->dts_aggregations[aggid - 1] == NULL);
9892 9899 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
9893 9900
9894 9901 frec = &agg->dtag_first->dta_rec;
9895 9902 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
9896 9903 frec->dtrd_alignment = sizeof (dtrace_aggid_t);
9897 9904
9898 9905 for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
9899 9906 ASSERT(!act->dta_intuple);
9900 9907 act->dta_intuple = 1;
9901 9908 }
9902 9909
9903 9910 return (&agg->dtag_action);
9904 9911 }
9905 9912
9906 9913 static void
9907 9914 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
9908 9915 {
9909 9916 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
9910 9917 dtrace_state_t *state = ecb->dte_state;
9911 9918 dtrace_aggid_t aggid = agg->dtag_id;
9912 9919
9913 9920 ASSERT(DTRACEACT_ISAGG(act->dta_kind));
9914 9921 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
9915 9922
9916 9923 ASSERT(state->dts_aggregations[aggid - 1] == agg);
9917 9924 state->dts_aggregations[aggid - 1] = NULL;
9918 9925
9919 9926 kmem_free(agg, sizeof (dtrace_aggregation_t));
9920 9927 }
9921 9928
9922 9929 static int
9923 9930 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
9924 9931 {
9925 9932 dtrace_action_t *action, *last;
9926 9933 dtrace_difo_t *dp = desc->dtad_difo;
9927 9934 uint32_t size = 0, align = sizeof (uint8_t), mask;
9928 9935 uint16_t format = 0;
9929 9936 dtrace_recdesc_t *rec;
9930 9937 dtrace_state_t *state = ecb->dte_state;
9931 9938 dtrace_optval_t *opt = state->dts_options, nframes, strsize;
9932 9939 uint64_t arg = desc->dtad_arg;
9933 9940
9934 9941 ASSERT(MUTEX_HELD(&dtrace_lock));
9935 9942 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
9936 9943
9937 9944 if (DTRACEACT_ISAGG(desc->dtad_kind)) {
9938 9945 /*
9939 9946 * If this is an aggregating action, there must be neither
9940 9947 * a speculate nor a commit on the action chain.
9941 9948 */
9942 9949 dtrace_action_t *act;
9943 9950
9944 9951 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
9945 9952 if (act->dta_kind == DTRACEACT_COMMIT)
9946 9953 return (EINVAL);
9947 9954
9948 9955 if (act->dta_kind == DTRACEACT_SPECULATE)
9949 9956 return (EINVAL);
9950 9957 }
9951 9958
9952 9959 action = dtrace_ecb_aggregation_create(ecb, desc);
9953 9960
9954 9961 if (action == NULL)
9955 9962 return (EINVAL);
9956 9963 } else {
9957 9964 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
9958 9965 (desc->dtad_kind == DTRACEACT_DIFEXPR &&
9959 9966 dp != NULL && dp->dtdo_destructive)) {
9960 9967 state->dts_destructive = 1;
9961 9968 }
9962 9969
9963 9970 switch (desc->dtad_kind) {
9964 9971 case DTRACEACT_PRINTF:
9965 9972 case DTRACEACT_PRINTA:
9966 9973 case DTRACEACT_SYSTEM:
9967 9974 case DTRACEACT_FREOPEN:
9968 9975 case DTRACEACT_DIFEXPR:
9969 9976 /*
9970 9977 * We know that our arg is a string -- turn it into a
9971 9978 * format.
9972 9979 */
9973 9980 if (arg == NULL) {
9974 9981 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
9975 9982 desc->dtad_kind == DTRACEACT_DIFEXPR);
9976 9983 format = 0;
9977 9984 } else {
9978 9985 ASSERT(arg != NULL);
9979 9986 ASSERT(arg > KERNELBASE);
9980 9987 format = dtrace_format_add(state,
9981 9988 (char *)(uintptr_t)arg);
9982 9989 }
9983 9990
9984 9991 /*FALLTHROUGH*/
9985 9992 case DTRACEACT_LIBACT:
9986 9993 case DTRACEACT_TRACEMEM:
9987 9994 case DTRACEACT_TRACEMEM_DYNSIZE:
9988 9995 if (dp == NULL)
9989 9996 return (EINVAL);
9990 9997
9991 9998 if ((size = dp->dtdo_rtype.dtdt_size) != 0)
9992 9999 break;
9993 10000
9994 10001 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
9995 10002 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9996 10003 return (EINVAL);
9997 10004
9998 10005 size = opt[DTRACEOPT_STRSIZE];
9999 10006 }
10000 10007
10001 10008 break;
10002 10009
10003 10010 case DTRACEACT_STACK:
10004 10011 if ((nframes = arg) == 0) {
10005 10012 nframes = opt[DTRACEOPT_STACKFRAMES];
10006 10013 ASSERT(nframes > 0);
10007 10014 arg = nframes;
10008 10015 }
10009 10016
10010 10017 size = nframes * sizeof (pc_t);
10011 10018 break;
10012 10019
10013 10020 case DTRACEACT_JSTACK:
10014 10021 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
10015 10022 strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
10016 10023
10017 10024 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
10018 10025 nframes = opt[DTRACEOPT_JSTACKFRAMES];
10019 10026
10020 10027 arg = DTRACE_USTACK_ARG(nframes, strsize);
10021 10028
10022 10029 /*FALLTHROUGH*/
10023 10030 case DTRACEACT_USTACK:
10024 10031 if (desc->dtad_kind != DTRACEACT_JSTACK &&
10025 10032 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
10026 10033 strsize = DTRACE_USTACK_STRSIZE(arg);
10027 10034 nframes = opt[DTRACEOPT_USTACKFRAMES];
10028 10035 ASSERT(nframes > 0);
10029 10036 arg = DTRACE_USTACK_ARG(nframes, strsize);
10030 10037 }
10031 10038
10032 10039 /*
10033 10040 * Save a slot for the pid.
10034 10041 */
10035 10042 size = (nframes + 1) * sizeof (uint64_t);
10036 10043 size += DTRACE_USTACK_STRSIZE(arg);
10037 10044 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
10038 10045
10039 10046 break;
10040 10047
10041 10048 case DTRACEACT_SYM:
10042 10049 case DTRACEACT_MOD:
10043 10050 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
10044 10051 sizeof (uint64_t)) ||
10045 10052 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10046 10053 return (EINVAL);
10047 10054 break;
10048 10055
10049 10056 case DTRACEACT_USYM:
10050 10057 case DTRACEACT_UMOD:
10051 10058 case DTRACEACT_UADDR:
10052 10059 if (dp == NULL ||
10053 10060 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
10054 10061 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10055 10062 return (EINVAL);
10056 10063
10057 10064 /*
10058 10065 * We have a slot for the pid, plus a slot for the
10059 10066 * argument. To keep things simple (aligned with
10060 10067 * bitness-neutral sizing), we store each as a 64-bit
10061 10068 * quantity.
10062 10069 */
10063 10070 size = 2 * sizeof (uint64_t);
10064 10071 break;
10065 10072
10066 10073 case DTRACEACT_STOP:
10067 10074 case DTRACEACT_BREAKPOINT:
10068 10075 case DTRACEACT_PANIC:
10069 10076 break;
10070 10077
10071 10078 case DTRACEACT_CHILL:
10072 10079 case DTRACEACT_DISCARD:
10073 10080 case DTRACEACT_RAISE:
10074 10081 if (dp == NULL)
10075 10082 return (EINVAL);
10076 10083 break;
10077 10084
10078 10085 case DTRACEACT_EXIT:
10079 10086 if (dp == NULL ||
10080 10087 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
10081 10088 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10082 10089 return (EINVAL);
10083 10090 break;
10084 10091
10085 10092 case DTRACEACT_SPECULATE:
10086 10093 if (ecb->dte_size > sizeof (dtrace_epid_t))
10087 10094 return (EINVAL);
10088 10095
10089 10096 if (dp == NULL)
10090 10097 return (EINVAL);
10091 10098
10092 10099 state->dts_speculates = 1;
10093 10100 break;
10094 10101
10095 10102 case DTRACEACT_COMMIT: {
10096 10103 dtrace_action_t *act = ecb->dte_action;
10097 10104
10098 10105 for (; act != NULL; act = act->dta_next) {
10099 10106 if (act->dta_kind == DTRACEACT_COMMIT)
10100 10107 return (EINVAL);
10101 10108 }
10102 10109
10103 10110 if (dp == NULL)
10104 10111 return (EINVAL);
10105 10112 break;
10106 10113 }
10107 10114
10108 10115 default:
10109 10116 return (EINVAL);
10110 10117 }
10111 10118
10112 10119 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
10113 10120 /*
10114 10121 * If this is a data-storing action or a speculate,
10115 10122 * we must be sure that there isn't a commit on the
10116 10123 * action chain.
10117 10124 */
10118 10125 dtrace_action_t *act = ecb->dte_action;
10119 10126
10120 10127 for (; act != NULL; act = act->dta_next) {
10121 10128 if (act->dta_kind == DTRACEACT_COMMIT)
10122 10129 return (EINVAL);
10123 10130 }
10124 10131 }
10125 10132
10126 10133 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
10127 10134 action->dta_rec.dtrd_size = size;
10128 10135 }
10129 10136
10130 10137 action->dta_refcnt = 1;
10131 10138 rec = &action->dta_rec;
10132 10139 size = rec->dtrd_size;
10133 10140
10134 10141 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
10135 10142 if (!(size & mask)) {
10136 10143 align = mask + 1;
10137 10144 break;
10138 10145 }
10139 10146 }
10140 10147
10141 10148 action->dta_kind = desc->dtad_kind;
10142 10149
10143 10150 if ((action->dta_difo = dp) != NULL)
10144 10151 dtrace_difo_hold(dp);
10145 10152
10146 10153 rec->dtrd_action = action->dta_kind;
10147 10154 rec->dtrd_arg = arg;
10148 10155 rec->dtrd_uarg = desc->dtad_uarg;
10149 10156 rec->dtrd_alignment = (uint16_t)align;
10150 10157 rec->dtrd_format = format;
10151 10158
10152 10159 if ((last = ecb->dte_action_last) != NULL) {
10153 10160 ASSERT(ecb->dte_action != NULL);
10154 10161 action->dta_prev = last;
10155 10162 last->dta_next = action;
10156 10163 } else {
10157 10164 ASSERT(ecb->dte_action == NULL);
10158 10165 ecb->dte_action = action;
10159 10166 }
10160 10167
10161 10168 ecb->dte_action_last = action;
10162 10169
10163 10170 return (0);
10164 10171 }
10165 10172
10166 10173 static void
10167 10174 dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
10168 10175 {
10169 10176 dtrace_action_t *act = ecb->dte_action, *next;
10170 10177 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
10171 10178 dtrace_difo_t *dp;
10172 10179 uint16_t format;
10173 10180
10174 10181 if (act != NULL && act->dta_refcnt > 1) {
10175 10182 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
10176 10183 act->dta_refcnt--;
10177 10184 } else {
10178 10185 for (; act != NULL; act = next) {
10179 10186 next = act->dta_next;
10180 10187 ASSERT(next != NULL || act == ecb->dte_action_last);
10181 10188 ASSERT(act->dta_refcnt == 1);
10182 10189
10183 10190 if ((format = act->dta_rec.dtrd_format) != 0)
10184 10191 dtrace_format_remove(ecb->dte_state, format);
10185 10192
10186 10193 if ((dp = act->dta_difo) != NULL)
10187 10194 dtrace_difo_release(dp, vstate);
10188 10195
10189 10196 if (DTRACEACT_ISAGG(act->dta_kind)) {
10190 10197 dtrace_ecb_aggregation_destroy(ecb, act);
10191 10198 } else {
10192 10199 kmem_free(act, sizeof (dtrace_action_t));
10193 10200 }
10194 10201 }
10195 10202 }
10196 10203
10197 10204 ecb->dte_action = NULL;
10198 10205 ecb->dte_action_last = NULL;
10199 10206 ecb->dte_size = sizeof (dtrace_epid_t);
10200 10207 }
10201 10208
10202 10209 static void
10203 10210 dtrace_ecb_disable(dtrace_ecb_t *ecb)
10204 10211 {
10205 10212 /*
10206 10213 * We disable the ECB by removing it from its probe.
10207 10214 */
10208 10215 dtrace_ecb_t *pecb, *prev = NULL;
10209 10216 dtrace_probe_t *probe = ecb->dte_probe;
10210 10217
10211 10218 ASSERT(MUTEX_HELD(&dtrace_lock));
10212 10219
10213 10220 if (probe == NULL) {
10214 10221 /*
10215 10222 * This is the NULL probe; there is nothing to disable.
10216 10223 */
10217 10224 return;
10218 10225 }
10219 10226
10220 10227 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
10221 10228 if (pecb == ecb)
10222 10229 break;
10223 10230 prev = pecb;
10224 10231 }
10225 10232
10226 10233 ASSERT(pecb != NULL);
10227 10234
10228 10235 if (prev == NULL) {
10229 10236 probe->dtpr_ecb = ecb->dte_next;
10230 10237 } else {
10231 10238 prev->dte_next = ecb->dte_next;
10232 10239 }
10233 10240
10234 10241 if (ecb == probe->dtpr_ecb_last) {
10235 10242 ASSERT(ecb->dte_next == NULL);
10236 10243 probe->dtpr_ecb_last = prev;
10237 10244 }
10238 10245
10239 10246 /*
10240 10247 * The ECB has been disconnected from the probe; now sync to assure
10241 10248 * that all CPUs have seen the change before returning.
10242 10249 */
10243 10250 dtrace_sync();
10244 10251
10245 10252 if (probe->dtpr_ecb == NULL) {
10246 10253 /*
10247 10254 * That was the last ECB on the probe; clear the predicate
10248 10255 * cache ID for the probe, disable it and sync one more time
10249 10256 * to assure that we'll never hit it again.
10250 10257 */
10251 10258 dtrace_provider_t *prov = probe->dtpr_provider;
10252 10259
10253 10260 ASSERT(ecb->dte_next == NULL);
10254 10261 ASSERT(probe->dtpr_ecb_last == NULL);
10255 10262 probe->dtpr_predcache = DTRACE_CACHEIDNONE;
10256 10263 prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
10257 10264 probe->dtpr_id, probe->dtpr_arg);
10258 10265 dtrace_sync();
10259 10266 } else {
10260 10267 /*
10261 10268 * There is at least one ECB remaining on the probe. If there
10262 10269 * is _exactly_ one, set the probe's predicate cache ID to be
10263 10270 * the predicate cache ID of the remaining ECB.
10264 10271 */
10265 10272 ASSERT(probe->dtpr_ecb_last != NULL);
10266 10273 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
10267 10274
10268 10275 if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
10269 10276 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
10270 10277
10271 10278 ASSERT(probe->dtpr_ecb->dte_next == NULL);
10272 10279
10273 10280 if (p != NULL)
10274 10281 probe->dtpr_predcache = p->dtp_cacheid;
10275 10282 }
10276 10283
10277 10284 ecb->dte_next = NULL;
10278 10285 }
10279 10286 }
10280 10287
10281 10288 static void
10282 10289 dtrace_ecb_destroy(dtrace_ecb_t *ecb)
10283 10290 {
10284 10291 dtrace_state_t *state = ecb->dte_state;
10285 10292 dtrace_vstate_t *vstate = &state->dts_vstate;
10286 10293 dtrace_predicate_t *pred;
10287 10294 dtrace_epid_t epid = ecb->dte_epid;
10288 10295
10289 10296 ASSERT(MUTEX_HELD(&dtrace_lock));
10290 10297 ASSERT(ecb->dte_next == NULL);
10291 10298 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
10292 10299
10293 10300 if ((pred = ecb->dte_predicate) != NULL)
10294 10301 dtrace_predicate_release(pred, vstate);
10295 10302
10296 10303 dtrace_ecb_action_remove(ecb);
10297 10304
10298 10305 ASSERT(state->dts_ecbs[epid - 1] == ecb);
10299 10306 state->dts_ecbs[epid - 1] = NULL;
10300 10307
10301 10308 kmem_free(ecb, sizeof (dtrace_ecb_t));
10302 10309 }
10303 10310
10304 10311 static dtrace_ecb_t *
10305 10312 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
10306 10313 dtrace_enabling_t *enab)
10307 10314 {
10308 10315 dtrace_ecb_t *ecb;
10309 10316 dtrace_predicate_t *pred;
10310 10317 dtrace_actdesc_t *act;
10311 10318 dtrace_provider_t *prov;
10312 10319 dtrace_ecbdesc_t *desc = enab->dten_current;
10313 10320
10314 10321 ASSERT(MUTEX_HELD(&dtrace_lock));
10315 10322 ASSERT(state != NULL);
10316 10323
10317 10324 ecb = dtrace_ecb_add(state, probe);
10318 10325 ecb->dte_uarg = desc->dted_uarg;
10319 10326
10320 10327 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
10321 10328 dtrace_predicate_hold(pred);
10322 10329 ecb->dte_predicate = pred;
10323 10330 }
10324 10331
10325 10332 if (probe != NULL) {
10326 10333 /*
10327 10334 * If the provider shows more leg than the consumer is old
10328 10335 * enough to see, we need to enable the appropriate implicit
10329 10336 * predicate bits to prevent the ecb from activating at
10330 10337 * revealing times.
10331 10338 *
10332 10339 * Providers specifying DTRACE_PRIV_USER at register time
10333 10340 * are stating that they need the /proc-style privilege
10334 10341 * model to be enforced, and this is what DTRACE_COND_OWNER
10335 10342 * and DTRACE_COND_ZONEOWNER will then do at probe time.
10336 10343 */
10337 10344 prov = probe->dtpr_provider;
10338 10345 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
10339 10346 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10340 10347 ecb->dte_cond |= DTRACE_COND_OWNER;
10341 10348
10342 10349 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
10343 10350 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10344 10351 ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
10345 10352
10346 10353 /*
10347 10354 * If the provider shows us kernel innards and the user
10348 10355 * is lacking sufficient privilege, enable the
10349 10356 * DTRACE_COND_USERMODE implicit predicate.
10350 10357 */
10351 10358 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
10352 10359 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
10353 10360 ecb->dte_cond |= DTRACE_COND_USERMODE;
10354 10361 }
10355 10362
10356 10363 if (dtrace_ecb_create_cache != NULL) {
10357 10364 /*
10358 10365 * If we have a cached ecb, we'll use its action list instead
10359 10366 * of creating our own (saving both time and space).
10360 10367 */
10361 10368 dtrace_ecb_t *cached = dtrace_ecb_create_cache;
10362 10369 dtrace_action_t *act = cached->dte_action;
10363 10370
10364 10371 if (act != NULL) {
10365 10372 ASSERT(act->dta_refcnt > 0);
10366 10373 act->dta_refcnt++;
10367 10374 ecb->dte_action = act;
10368 10375 ecb->dte_action_last = cached->dte_action_last;
10369 10376 ecb->dte_needed = cached->dte_needed;
10370 10377 ecb->dte_size = cached->dte_size;
10371 10378 ecb->dte_alignment = cached->dte_alignment;
10372 10379 }
10373 10380
10374 10381 return (ecb);
10375 10382 }
10376 10383
10377 10384 for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
10378 10385 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
10379 10386 dtrace_ecb_destroy(ecb);
10380 10387 return (NULL);
10381 10388 }
10382 10389 }
10383 10390
10384 10391 dtrace_ecb_resize(ecb);
10385 10392
10386 10393 return (dtrace_ecb_create_cache = ecb);
10387 10394 }
10388 10395
10389 10396 static int
10390 10397 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
10391 10398 {
10392 10399 dtrace_ecb_t *ecb;
10393 10400 dtrace_enabling_t *enab = arg;
10394 10401 dtrace_state_t *state = enab->dten_vstate->dtvs_state;
10395 10402
10396 10403 ASSERT(state != NULL);
10397 10404
10398 10405 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
10399 10406 /*
10400 10407 * This probe was created in a generation for which this
10401 10408 * enabling has previously created ECBs; we don't want to
10402 10409 * enable it again, so just kick out.
10403 10410 */
10404 10411 return (DTRACE_MATCH_NEXT);
10405 10412 }
10406 10413
10407 10414 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
10408 10415 return (DTRACE_MATCH_DONE);
10409 10416
10410 10417 if (dtrace_ecb_enable(ecb) < 0)
10411 10418 return (DTRACE_MATCH_FAIL);
10412 10419
10413 10420 return (DTRACE_MATCH_NEXT);
10414 10421 }
10415 10422
10416 10423 static dtrace_ecb_t *
10417 10424 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
10418 10425 {
10419 10426 dtrace_ecb_t *ecb;
10420 10427
10421 10428 ASSERT(MUTEX_HELD(&dtrace_lock));
10422 10429
10423 10430 if (id == 0 || id > state->dts_necbs)
10424 10431 return (NULL);
10425 10432
10426 10433 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
10427 10434 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
10428 10435
10429 10436 return (state->dts_ecbs[id - 1]);
10430 10437 }
10431 10438
10432 10439 static dtrace_aggregation_t *
10433 10440 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
10434 10441 {
10435 10442 dtrace_aggregation_t *agg;
10436 10443
10437 10444 ASSERT(MUTEX_HELD(&dtrace_lock));
10438 10445
10439 10446 if (id == 0 || id > state->dts_naggregations)
10440 10447 return (NULL);
10441 10448
10442 10449 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
10443 10450 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
10444 10451 agg->dtag_id == id);
10445 10452
10446 10453 return (state->dts_aggregations[id - 1]);
10447 10454 }
10448 10455
10449 10456 /*
10450 10457 * DTrace Buffer Functions
10451 10458 *
10452 10459 * The following functions manipulate DTrace buffers. Most of these functions
10453 10460 * are called in the context of establishing or processing consumer state;
10454 10461 * exceptions are explicitly noted.
10455 10462 */
10456 10463
10457 10464 /*
10458 10465 * Note: called from cross call context. This function switches the two
10459 10466 * buffers on a given CPU. The atomicity of this operation is assured by
10460 10467 * disabling interrupts while the actual switch takes place; the disabling of
10461 10468 * interrupts serializes the execution with any execution of dtrace_probe() on
10462 10469 * the same CPU.
10463 10470 */
10464 10471 static void
10465 10472 dtrace_buffer_switch(dtrace_buffer_t *buf)
10466 10473 {
10467 10474 caddr_t tomax = buf->dtb_tomax;
10468 10475 caddr_t xamot = buf->dtb_xamot;
10469 10476 dtrace_icookie_t cookie;
10470 10477 hrtime_t now = dtrace_gethrtime();
10471 10478
10472 10479 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10473 10480 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
10474 10481
10475 10482 cookie = dtrace_interrupt_disable();
10476 10483 buf->dtb_tomax = xamot;
10477 10484 buf->dtb_xamot = tomax;
10478 10485 buf->dtb_xamot_drops = buf->dtb_drops;
10479 10486 buf->dtb_xamot_offset = buf->dtb_offset;
10480 10487 buf->dtb_xamot_errors = buf->dtb_errors;
10481 10488 buf->dtb_xamot_flags = buf->dtb_flags;
10482 10489 buf->dtb_offset = 0;
10483 10490 buf->dtb_drops = 0;
10484 10491 buf->dtb_errors = 0;
10485 10492 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
10486 10493 buf->dtb_interval = now - buf->dtb_switched;
10487 10494 buf->dtb_switched = now;
10488 10495 dtrace_interrupt_enable(cookie);
10489 10496 }
10490 10497
10491 10498 /*
10492 10499 * Note: called from cross call context. This function activates a buffer
10493 10500 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
10494 10501 * is guaranteed by the disabling of interrupts.
10495 10502 */
10496 10503 static void
10497 10504 dtrace_buffer_activate(dtrace_state_t *state)
10498 10505 {
10499 10506 dtrace_buffer_t *buf;
10500 10507 dtrace_icookie_t cookie = dtrace_interrupt_disable();
10501 10508
10502 10509 buf = &state->dts_buffer[CPU->cpu_id];
10503 10510
10504 10511 if (buf->dtb_tomax != NULL) {
10505 10512 /*
10506 10513 * We might like to assert that the buffer is marked inactive,
10507 10514 * but this isn't necessarily true: the buffer for the CPU
10508 10515 * that processes the BEGIN probe has its buffer activated
10509 10516 * manually. In this case, we take the (harmless) action
10510 10517 * re-clearing the bit INACTIVE bit.
10511 10518 */
10512 10519 buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
10513 10520 }
10514 10521
10515 10522 dtrace_interrupt_enable(cookie);
10516 10523 }
10517 10524
10518 10525 static int
10519 10526 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
10520 10527 processorid_t cpu, int *factor)
10521 10528 {
10522 10529 cpu_t *cp;
10523 10530 dtrace_buffer_t *buf;
10524 10531 int allocated = 0, desired = 0;
10525 10532
10526 10533 ASSERT(MUTEX_HELD(&cpu_lock));
10527 10534 ASSERT(MUTEX_HELD(&dtrace_lock));
10528 10535
10529 10536 *factor = 1;
10530 10537
10531 10538 if (size > dtrace_nonroot_maxsize &&
10532 10539 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
10533 10540 return (EFBIG);
10534 10541
10535 10542 cp = cpu_list;
10536 10543
10537 10544 do {
10538 10545 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10539 10546 continue;
10540 10547
10541 10548 buf = &bufs[cp->cpu_id];
10542 10549
10543 10550 /*
10544 10551 * If there is already a buffer allocated for this CPU, it
10545 10552 * is only possible that this is a DR event. In this case,
10546 10553 * the buffer size must match our specified size.
10547 10554 */
10548 10555 if (buf->dtb_tomax != NULL) {
10549 10556 ASSERT(buf->dtb_size == size);
10550 10557 continue;
10551 10558 }
10552 10559
10553 10560 ASSERT(buf->dtb_xamot == NULL);
10554 10561
10555 10562 if ((buf->dtb_tomax = kmem_zalloc(size,
10556 10563 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
10557 10564 goto err;
10558 10565
10559 10566 buf->dtb_size = size;
10560 10567 buf->dtb_flags = flags;
10561 10568 buf->dtb_offset = 0;
10562 10569 buf->dtb_drops = 0;
10563 10570
10564 10571 if (flags & DTRACEBUF_NOSWITCH)
10565 10572 continue;
10566 10573
10567 10574 if ((buf->dtb_xamot = kmem_zalloc(size,
10568 10575 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
10569 10576 goto err;
10570 10577 } while ((cp = cp->cpu_next) != cpu_list);
10571 10578
10572 10579 return (0);
10573 10580
10574 10581 err:
10575 10582 cp = cpu_list;
10576 10583
10577 10584 do {
10578 10585 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10579 10586 continue;
10580 10587
10581 10588 buf = &bufs[cp->cpu_id];
10582 10589 desired += 2;
10583 10590
10584 10591 if (buf->dtb_xamot != NULL) {
10585 10592 ASSERT(buf->dtb_tomax != NULL);
10586 10593 ASSERT(buf->dtb_size == size);
10587 10594 kmem_free(buf->dtb_xamot, size);
10588 10595 allocated++;
10589 10596 }
10590 10597
10591 10598 if (buf->dtb_tomax != NULL) {
10592 10599 ASSERT(buf->dtb_size == size);
10593 10600 kmem_free(buf->dtb_tomax, size);
10594 10601 allocated++;
10595 10602 }
10596 10603
10597 10604 buf->dtb_tomax = NULL;
10598 10605 buf->dtb_xamot = NULL;
10599 10606 buf->dtb_size = 0;
10600 10607 } while ((cp = cp->cpu_next) != cpu_list);
10601 10608
10602 10609 *factor = desired / (allocated > 0 ? allocated : 1);
10603 10610
10604 10611 return (ENOMEM);
10605 10612 }
10606 10613
10607 10614 /*
10608 10615 * Note: called from probe context. This function just increments the drop
10609 10616 * count on a buffer. It has been made a function to allow for the
10610 10617 * possibility of understanding the source of mysterious drop counts. (A
10611 10618 * problem for which one may be particularly disappointed that DTrace cannot
10612 10619 * be used to understand DTrace.)
10613 10620 */
10614 10621 static void
10615 10622 dtrace_buffer_drop(dtrace_buffer_t *buf)
10616 10623 {
10617 10624 buf->dtb_drops++;
10618 10625 }
10619 10626
10620 10627 /*
10621 10628 * Note: called from probe context. This function is called to reserve space
10622 10629 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10623 10630 * mstate. Returns the new offset in the buffer, or a negative value if an
10624 10631 * error has occurred.
10625 10632 */
10626 10633 static intptr_t
10627 10634 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
10628 10635 dtrace_state_t *state, dtrace_mstate_t *mstate)
10629 10636 {
10630 10637 intptr_t offs = buf->dtb_offset, soffs;
10631 10638 intptr_t woffs;
10632 10639 caddr_t tomax;
10633 10640 size_t total;
10634 10641
10635 10642 if (buf->dtb_flags & DTRACEBUF_INACTIVE)
10636 10643 return (-1);
10637 10644
10638 10645 if ((tomax = buf->dtb_tomax) == NULL) {
10639 10646 dtrace_buffer_drop(buf);
10640 10647 return (-1);
10641 10648 }
10642 10649
10643 10650 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
10644 10651 while (offs & (align - 1)) {
10645 10652 /*
10646 10653 * Assert that our alignment is off by a number which
10647 10654 * is itself sizeof (uint32_t) aligned.
10648 10655 */
10649 10656 ASSERT(!((align - (offs & (align - 1))) &
10650 10657 (sizeof (uint32_t) - 1)));
10651 10658 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
10652 10659 offs += sizeof (uint32_t);
10653 10660 }
10654 10661
10655 10662 if ((soffs = offs + needed) > buf->dtb_size) {
10656 10663 dtrace_buffer_drop(buf);
10657 10664 return (-1);
10658 10665 }
10659 10666
10660 10667 if (mstate == NULL)
10661 10668 return (offs);
10662 10669
10663 10670 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
10664 10671 mstate->dtms_scratch_size = buf->dtb_size - soffs;
10665 10672 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
10666 10673
10667 10674 return (offs);
10668 10675 }
10669 10676
10670 10677 if (buf->dtb_flags & DTRACEBUF_FILL) {
10671 10678 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
10672 10679 (buf->dtb_flags & DTRACEBUF_FULL))
10673 10680 return (-1);
10674 10681 goto out;
10675 10682 }
10676 10683
10677 10684 total = needed + (offs & (align - 1));
10678 10685
10679 10686 /*
10680 10687 * For a ring buffer, life is quite a bit more complicated. Before
10681 10688 * we can store any padding, we need to adjust our wrapping offset.
10682 10689 * (If we've never before wrapped or we're not about to, no adjustment
10683 10690 * is required.)
10684 10691 */
10685 10692 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
10686 10693 offs + total > buf->dtb_size) {
10687 10694 woffs = buf->dtb_xamot_offset;
10688 10695
10689 10696 if (offs + total > buf->dtb_size) {
10690 10697 /*
10691 10698 * We can't fit in the end of the buffer. First, a
10692 10699 * sanity check that we can fit in the buffer at all.
10693 10700 */
10694 10701 if (total > buf->dtb_size) {
10695 10702 dtrace_buffer_drop(buf);
10696 10703 return (-1);
10697 10704 }
10698 10705
10699 10706 /*
10700 10707 * We're going to be storing at the top of the buffer,
10701 10708 * so now we need to deal with the wrapped offset. We
10702 10709 * only reset our wrapped offset to 0 if it is
10703 10710 * currently greater than the current offset. If it
10704 10711 * is less than the current offset, it is because a
10705 10712 * previous allocation induced a wrap -- but the
10706 10713 * allocation didn't subsequently take the space due
10707 10714 * to an error or false predicate evaluation. In this
10708 10715 * case, we'll just leave the wrapped offset alone: if
10709 10716 * the wrapped offset hasn't been advanced far enough
10710 10717 * for this allocation, it will be adjusted in the
10711 10718 * lower loop.
10712 10719 */
10713 10720 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
10714 10721 if (woffs >= offs)
10715 10722 woffs = 0;
10716 10723 } else {
10717 10724 woffs = 0;
10718 10725 }
10719 10726
10720 10727 /*
10721 10728 * Now we know that we're going to be storing to the
10722 10729 * top of the buffer and that there is room for us
10723 10730 * there. We need to clear the buffer from the current
10724 10731 * offset to the end (there may be old gunk there).
10725 10732 */
10726 10733 while (offs < buf->dtb_size)
10727 10734 tomax[offs++] = 0;
10728 10735
10729 10736 /*
10730 10737 * We need to set our offset to zero. And because we
10731 10738 * are wrapping, we need to set the bit indicating as
10732 10739 * much. We can also adjust our needed space back
10733 10740 * down to the space required by the ECB -- we know
10734 10741 * that the top of the buffer is aligned.
10735 10742 */
10736 10743 offs = 0;
10737 10744 total = needed;
10738 10745 buf->dtb_flags |= DTRACEBUF_WRAPPED;
10739 10746 } else {
10740 10747 /*
10741 10748 * There is room for us in the buffer, so we simply
10742 10749 * need to check the wrapped offset.
10743 10750 */
10744 10751 if (woffs < offs) {
10745 10752 /*
10746 10753 * The wrapped offset is less than the offset.
10747 10754 * This can happen if we allocated buffer space
10748 10755 * that induced a wrap, but then we didn't
10749 10756 * subsequently take the space due to an error
10750 10757 * or false predicate evaluation. This is
10751 10758 * okay; we know that _this_ allocation isn't
10752 10759 * going to induce a wrap. We still can't
10753 10760 * reset the wrapped offset to be zero,
10754 10761 * however: the space may have been trashed in
10755 10762 * the previous failed probe attempt. But at
10756 10763 * least the wrapped offset doesn't need to
10757 10764 * be adjusted at all...
10758 10765 */
10759 10766 goto out;
10760 10767 }
10761 10768 }
10762 10769
10763 10770 while (offs + total > woffs) {
10764 10771 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
10765 10772 size_t size;
10766 10773
10767 10774 if (epid == DTRACE_EPIDNONE) {
10768 10775 size = sizeof (uint32_t);
10769 10776 } else {
10770 10777 ASSERT(epid <= state->dts_necbs);
10771 10778 ASSERT(state->dts_ecbs[epid - 1] != NULL);
10772 10779
10773 10780 size = state->dts_ecbs[epid - 1]->dte_size;
10774 10781 }
10775 10782
10776 10783 ASSERT(woffs + size <= buf->dtb_size);
10777 10784 ASSERT(size != 0);
10778 10785
10779 10786 if (woffs + size == buf->dtb_size) {
10780 10787 /*
10781 10788 * We've reached the end of the buffer; we want
10782 10789 * to set the wrapped offset to 0 and break
10783 10790 * out. However, if the offs is 0, then we're
10784 10791 * in a strange edge-condition: the amount of
10785 10792 * space that we want to reserve plus the size
10786 10793 * of the record that we're overwriting is
10787 10794 * greater than the size of the buffer. This
10788 10795 * is problematic because if we reserve the
10789 10796 * space but subsequently don't consume it (due
10790 10797 * to a failed predicate or error) the wrapped
10791 10798 * offset will be 0 -- yet the EPID at offset 0
10792 10799 * will not be committed. This situation is
10793 10800 * relatively easy to deal with: if we're in
10794 10801 * this case, the buffer is indistinguishable
10795 10802 * from one that hasn't wrapped; we need only
10796 10803 * finish the job by clearing the wrapped bit,
10797 10804 * explicitly setting the offset to be 0, and
10798 10805 * zero'ing out the old data in the buffer.
10799 10806 */
10800 10807 if (offs == 0) {
10801 10808 buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
10802 10809 buf->dtb_offset = 0;
10803 10810 woffs = total;
10804 10811
10805 10812 while (woffs < buf->dtb_size)
10806 10813 tomax[woffs++] = 0;
10807 10814 }
10808 10815
10809 10816 woffs = 0;
10810 10817 break;
10811 10818 }
10812 10819
10813 10820 woffs += size;
10814 10821 }
10815 10822
10816 10823 /*
10817 10824 * We have a wrapped offset. It may be that the wrapped offset
10818 10825 * has become zero -- that's okay.
10819 10826 */
10820 10827 buf->dtb_xamot_offset = woffs;
10821 10828 }
10822 10829
10823 10830 out:
10824 10831 /*
10825 10832 * Now we can plow the buffer with any necessary padding.
10826 10833 */
10827 10834 while (offs & (align - 1)) {
10828 10835 /*
10829 10836 * Assert that our alignment is off by a number which
10830 10837 * is itself sizeof (uint32_t) aligned.
10831 10838 */
10832 10839 ASSERT(!((align - (offs & (align - 1))) &
10833 10840 (sizeof (uint32_t) - 1)));
10834 10841 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
10835 10842 offs += sizeof (uint32_t);
10836 10843 }
10837 10844
10838 10845 if (buf->dtb_flags & DTRACEBUF_FILL) {
10839 10846 if (offs + needed > buf->dtb_size - state->dts_reserve) {
10840 10847 buf->dtb_flags |= DTRACEBUF_FULL;
10841 10848 return (-1);
10842 10849 }
10843 10850 }
10844 10851
10845 10852 if (mstate == NULL)
10846 10853 return (offs);
10847 10854
10848 10855 /*
10849 10856 * For ring buffers and fill buffers, the scratch space is always
10850 10857 * the inactive buffer.
10851 10858 */
10852 10859 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
10853 10860 mstate->dtms_scratch_size = buf->dtb_size;
10854 10861 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
10855 10862
10856 10863 return (offs);
10857 10864 }
10858 10865
10859 10866 static void
10860 10867 dtrace_buffer_polish(dtrace_buffer_t *buf)
10861 10868 {
10862 10869 ASSERT(buf->dtb_flags & DTRACEBUF_RING);
10863 10870 ASSERT(MUTEX_HELD(&dtrace_lock));
10864 10871
10865 10872 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
10866 10873 return;
10867 10874
10868 10875 /*
10869 10876 * We need to polish the ring buffer. There are three cases:
10870 10877 *
10871 10878 * - The first (and presumably most common) is that there is no gap
10872 10879 * between the buffer offset and the wrapped offset. In this case,
10873 10880 * there is nothing in the buffer that isn't valid data; we can
10874 10881 * mark the buffer as polished and return.
10875 10882 *
10876 10883 * - The second (less common than the first but still more common
10877 10884 * than the third) is that there is a gap between the buffer offset
10878 10885 * and the wrapped offset, and the wrapped offset is larger than the
10879 10886 * buffer offset. This can happen because of an alignment issue, or
10880 10887 * can happen because of a call to dtrace_buffer_reserve() that
10881 10888 * didn't subsequently consume the buffer space. In this case,
10882 10889 * we need to zero the data from the buffer offset to the wrapped
10883 10890 * offset.
10884 10891 *
10885 10892 * - The third (and least common) is that there is a gap between the
10886 10893 * buffer offset and the wrapped offset, but the wrapped offset is
10887 10894 * _less_ than the buffer offset. This can only happen because a
10888 10895 * call to dtrace_buffer_reserve() induced a wrap, but the space
10889 10896 * was not subsequently consumed. In this case, we need to zero the
10890 10897 * space from the offset to the end of the buffer _and_ from the
10891 10898 * top of the buffer to the wrapped offset.
10892 10899 */
10893 10900 if (buf->dtb_offset < buf->dtb_xamot_offset) {
10894 10901 bzero(buf->dtb_tomax + buf->dtb_offset,
10895 10902 buf->dtb_xamot_offset - buf->dtb_offset);
10896 10903 }
10897 10904
10898 10905 if (buf->dtb_offset > buf->dtb_xamot_offset) {
10899 10906 bzero(buf->dtb_tomax + buf->dtb_offset,
10900 10907 buf->dtb_size - buf->dtb_offset);
10901 10908 bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
10902 10909 }
10903 10910 }
10904 10911
10905 10912 /*
10906 10913 * This routine determines if data generated at the specified time has likely
10907 10914 * been entirely consumed at user-level. This routine is called to determine
10908 10915 * if an ECB on a defunct probe (but for an active enabling) can be safely
10909 10916 * disabled and destroyed.
10910 10917 */
10911 10918 static int
10912 10919 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
10913 10920 {
10914 10921 int i;
10915 10922
10916 10923 for (i = 0; i < NCPU; i++) {
10917 10924 dtrace_buffer_t *buf = &bufs[i];
10918 10925
10919 10926 if (buf->dtb_size == 0)
10920 10927 continue;
10921 10928
10922 10929 if (buf->dtb_flags & DTRACEBUF_RING)
10923 10930 return (0);
10924 10931
10925 10932 if (!buf->dtb_switched && buf->dtb_offset != 0)
10926 10933 return (0);
10927 10934
10928 10935 if (buf->dtb_switched - buf->dtb_interval < when)
10929 10936 return (0);
10930 10937 }
10931 10938
10932 10939 return (1);
10933 10940 }
10934 10941
10935 10942 static void
10936 10943 dtrace_buffer_free(dtrace_buffer_t *bufs)
10937 10944 {
10938 10945 int i;
10939 10946
10940 10947 for (i = 0; i < NCPU; i++) {
10941 10948 dtrace_buffer_t *buf = &bufs[i];
10942 10949
10943 10950 if (buf->dtb_tomax == NULL) {
10944 10951 ASSERT(buf->dtb_xamot == NULL);
10945 10952 ASSERT(buf->dtb_size == 0);
10946 10953 continue;
10947 10954 }
10948 10955
10949 10956 if (buf->dtb_xamot != NULL) {
10950 10957 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10951 10958 kmem_free(buf->dtb_xamot, buf->dtb_size);
10952 10959 }
10953 10960
10954 10961 kmem_free(buf->dtb_tomax, buf->dtb_size);
10955 10962 buf->dtb_size = 0;
10956 10963 buf->dtb_tomax = NULL;
10957 10964 buf->dtb_xamot = NULL;
10958 10965 }
10959 10966 }
10960 10967
10961 10968 /*
10962 10969 * DTrace Enabling Functions
10963 10970 */
10964 10971 static dtrace_enabling_t *
10965 10972 dtrace_enabling_create(dtrace_vstate_t *vstate)
10966 10973 {
10967 10974 dtrace_enabling_t *enab;
10968 10975
10969 10976 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
10970 10977 enab->dten_vstate = vstate;
10971 10978
10972 10979 return (enab);
10973 10980 }
10974 10981
10975 10982 static void
10976 10983 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
10977 10984 {
10978 10985 dtrace_ecbdesc_t **ndesc;
10979 10986 size_t osize, nsize;
10980 10987
10981 10988 /*
10982 10989 * We can't add to enablings after we've enabled them, or after we've
10983 10990 * retained them.
10984 10991 */
10985 10992 ASSERT(enab->dten_probegen == 0);
10986 10993 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
10987 10994
10988 10995 if (enab->dten_ndesc < enab->dten_maxdesc) {
10989 10996 enab->dten_desc[enab->dten_ndesc++] = ecb;
10990 10997 return;
10991 10998 }
10992 10999
10993 11000 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
10994 11001
10995 11002 if (enab->dten_maxdesc == 0) {
10996 11003 enab->dten_maxdesc = 1;
10997 11004 } else {
10998 11005 enab->dten_maxdesc <<= 1;
10999 11006 }
11000 11007
11001 11008 ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
11002 11009
11003 11010 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
11004 11011 ndesc = kmem_zalloc(nsize, KM_SLEEP);
11005 11012 bcopy(enab->dten_desc, ndesc, osize);
11006 11013 kmem_free(enab->dten_desc, osize);
11007 11014
11008 11015 enab->dten_desc = ndesc;
11009 11016 enab->dten_desc[enab->dten_ndesc++] = ecb;
11010 11017 }
11011 11018
11012 11019 static void
11013 11020 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
11014 11021 dtrace_probedesc_t *pd)
11015 11022 {
11016 11023 dtrace_ecbdesc_t *new;
11017 11024 dtrace_predicate_t *pred;
11018 11025 dtrace_actdesc_t *act;
11019 11026
11020 11027 /*
11021 11028 * We're going to create a new ECB description that matches the
11022 11029 * specified ECB in every way, but has the specified probe description.
11023 11030 */
11024 11031 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
11025 11032
11026 11033 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
11027 11034 dtrace_predicate_hold(pred);
11028 11035
11029 11036 for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
11030 11037 dtrace_actdesc_hold(act);
11031 11038
11032 11039 new->dted_action = ecb->dted_action;
11033 11040 new->dted_pred = ecb->dted_pred;
11034 11041 new->dted_probe = *pd;
11035 11042 new->dted_uarg = ecb->dted_uarg;
11036 11043
11037 11044 dtrace_enabling_add(enab, new);
11038 11045 }
11039 11046
11040 11047 static void
11041 11048 dtrace_enabling_dump(dtrace_enabling_t *enab)
11042 11049 {
11043 11050 int i;
11044 11051
11045 11052 for (i = 0; i < enab->dten_ndesc; i++) {
11046 11053 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
11047 11054
11048 11055 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
11049 11056 desc->dtpd_provider, desc->dtpd_mod,
11050 11057 desc->dtpd_func, desc->dtpd_name);
11051 11058 }
11052 11059 }
11053 11060
11054 11061 static void
11055 11062 dtrace_enabling_destroy(dtrace_enabling_t *enab)
11056 11063 {
11057 11064 int i;
11058 11065 dtrace_ecbdesc_t *ep;
11059 11066 dtrace_vstate_t *vstate = enab->dten_vstate;
11060 11067
11061 11068 ASSERT(MUTEX_HELD(&dtrace_lock));
11062 11069
11063 11070 for (i = 0; i < enab->dten_ndesc; i++) {
11064 11071 dtrace_actdesc_t *act, *next;
11065 11072 dtrace_predicate_t *pred;
11066 11073
11067 11074 ep = enab->dten_desc[i];
11068 11075
11069 11076 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
11070 11077 dtrace_predicate_release(pred, vstate);
11071 11078
11072 11079 for (act = ep->dted_action; act != NULL; act = next) {
11073 11080 next = act->dtad_next;
11074 11081 dtrace_actdesc_release(act, vstate);
11075 11082 }
11076 11083
11077 11084 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
11078 11085 }
11079 11086
11080 11087 kmem_free(enab->dten_desc,
11081 11088 enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
11082 11089
11083 11090 /*
11084 11091 * If this was a retained enabling, decrement the dts_nretained count
11085 11092 * and take it off of the dtrace_retained list.
11086 11093 */
11087 11094 if (enab->dten_prev != NULL || enab->dten_next != NULL ||
11088 11095 dtrace_retained == enab) {
11089 11096 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11090 11097 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
11091 11098 enab->dten_vstate->dtvs_state->dts_nretained--;
11092 11099 dtrace_retained_gen++;
11093 11100 }
11094 11101
11095 11102 if (enab->dten_prev == NULL) {
11096 11103 if (dtrace_retained == enab) {
11097 11104 dtrace_retained = enab->dten_next;
11098 11105
11099 11106 if (dtrace_retained != NULL)
11100 11107 dtrace_retained->dten_prev = NULL;
11101 11108 }
11102 11109 } else {
11103 11110 ASSERT(enab != dtrace_retained);
11104 11111 ASSERT(dtrace_retained != NULL);
11105 11112 enab->dten_prev->dten_next = enab->dten_next;
11106 11113 }
11107 11114
11108 11115 if (enab->dten_next != NULL) {
11109 11116 ASSERT(dtrace_retained != NULL);
11110 11117 enab->dten_next->dten_prev = enab->dten_prev;
11111 11118 }
11112 11119
11113 11120 kmem_free(enab, sizeof (dtrace_enabling_t));
11114 11121 }
11115 11122
11116 11123 static int
11117 11124 dtrace_enabling_retain(dtrace_enabling_t *enab)
11118 11125 {
11119 11126 dtrace_state_t *state;
11120 11127
11121 11128 ASSERT(MUTEX_HELD(&dtrace_lock));
11122 11129 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
11123 11130 ASSERT(enab->dten_vstate != NULL);
11124 11131
11125 11132 state = enab->dten_vstate->dtvs_state;
11126 11133 ASSERT(state != NULL);
11127 11134
11128 11135 /*
11129 11136 * We only allow each state to retain dtrace_retain_max enablings.
11130 11137 */
11131 11138 if (state->dts_nretained >= dtrace_retain_max)
11132 11139 return (ENOSPC);
11133 11140
11134 11141 state->dts_nretained++;
11135 11142 dtrace_retained_gen++;
11136 11143
11137 11144 if (dtrace_retained == NULL) {
11138 11145 dtrace_retained = enab;
11139 11146 return (0);
11140 11147 }
11141 11148
11142 11149 enab->dten_next = dtrace_retained;
11143 11150 dtrace_retained->dten_prev = enab;
11144 11151 dtrace_retained = enab;
11145 11152
11146 11153 return (0);
11147 11154 }
11148 11155
11149 11156 static int
11150 11157 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
11151 11158 dtrace_probedesc_t *create)
11152 11159 {
11153 11160 dtrace_enabling_t *new, *enab;
11154 11161 int found = 0, err = ENOENT;
11155 11162
11156 11163 ASSERT(MUTEX_HELD(&dtrace_lock));
11157 11164 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
11158 11165 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
11159 11166 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
11160 11167 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
11161 11168
11162 11169 new = dtrace_enabling_create(&state->dts_vstate);
11163 11170
11164 11171 /*
11165 11172 * Iterate over all retained enablings, looking for enablings that
11166 11173 * match the specified state.
11167 11174 */
11168 11175 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11169 11176 int i;
11170 11177
11171 11178 /*
11172 11179 * dtvs_state can only be NULL for helper enablings -- and
11173 11180 * helper enablings can't be retained.
11174 11181 */
11175 11182 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11176 11183
11177 11184 if (enab->dten_vstate->dtvs_state != state)
11178 11185 continue;
11179 11186
11180 11187 /*
11181 11188 * Now iterate over each probe description; we're looking for
11182 11189 * an exact match to the specified probe description.
11183 11190 */
11184 11191 for (i = 0; i < enab->dten_ndesc; i++) {
11185 11192 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11186 11193 dtrace_probedesc_t *pd = &ep->dted_probe;
11187 11194
11188 11195 if (strcmp(pd->dtpd_provider, match->dtpd_provider))
11189 11196 continue;
11190 11197
11191 11198 if (strcmp(pd->dtpd_mod, match->dtpd_mod))
11192 11199 continue;
11193 11200
11194 11201 if (strcmp(pd->dtpd_func, match->dtpd_func))
11195 11202 continue;
11196 11203
11197 11204 if (strcmp(pd->dtpd_name, match->dtpd_name))
11198 11205 continue;
11199 11206
11200 11207 /*
11201 11208 * We have a winning probe! Add it to our growing
11202 11209 * enabling.
11203 11210 */
11204 11211 found = 1;
11205 11212 dtrace_enabling_addlike(new, ep, create);
11206 11213 }
11207 11214 }
11208 11215
11209 11216 if (!found || (err = dtrace_enabling_retain(new)) != 0) {
11210 11217 dtrace_enabling_destroy(new);
11211 11218 return (err);
11212 11219 }
11213 11220
11214 11221 return (0);
11215 11222 }
11216 11223
11217 11224 static void
11218 11225 dtrace_enabling_retract(dtrace_state_t *state)
11219 11226 {
11220 11227 dtrace_enabling_t *enab, *next;
11221 11228
11222 11229 ASSERT(MUTEX_HELD(&dtrace_lock));
11223 11230
11224 11231 /*
11225 11232 * Iterate over all retained enablings, destroy the enablings retained
11226 11233 * for the specified state.
11227 11234 */
11228 11235 for (enab = dtrace_retained; enab != NULL; enab = next) {
11229 11236 next = enab->dten_next;
11230 11237
11231 11238 /*
11232 11239 * dtvs_state can only be NULL for helper enablings -- and
11233 11240 * helper enablings can't be retained.
11234 11241 */
11235 11242 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11236 11243
11237 11244 if (enab->dten_vstate->dtvs_state == state) {
11238 11245 ASSERT(state->dts_nretained > 0);
11239 11246 dtrace_enabling_destroy(enab);
11240 11247 }
11241 11248 }
11242 11249
11243 11250 ASSERT(state->dts_nretained == 0);
11244 11251 }
11245 11252
11246 11253 static int
11247 11254 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
11248 11255 {
11249 11256 int i = 0;
11250 11257 int total_matched = 0, matched = 0;
11251 11258
11252 11259 ASSERT(MUTEX_HELD(&cpu_lock));
11253 11260 ASSERT(MUTEX_HELD(&dtrace_lock));
11254 11261
11255 11262 for (i = 0; i < enab->dten_ndesc; i++) {
11256 11263 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11257 11264
11258 11265 enab->dten_current = ep;
11259 11266 enab->dten_error = 0;
11260 11267
11261 11268 /*
11262 11269 * If a provider failed to enable a probe then get out and
11263 11270 * let the consumer know we failed.
11264 11271 */
11265 11272 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
11266 11273 return (EBUSY);
11267 11274
11268 11275 total_matched += matched;
11269 11276
11270 11277 if (enab->dten_error != 0) {
11271 11278 /*
11272 11279 * If we get an error half-way through enabling the
11273 11280 * probes, we kick out -- perhaps with some number of
11274 11281 * them enabled. Leaving enabled probes enabled may
11275 11282 * be slightly confusing for user-level, but we expect
11276 11283 * that no one will attempt to actually drive on in
11277 11284 * the face of such errors. If this is an anonymous
11278 11285 * enabling (indicated with a NULL nmatched pointer),
11279 11286 * we cmn_err() a message. We aren't expecting to
11280 11287 * get such an error -- such as it can exist at all,
11281 11288 * it would be a result of corrupted DOF in the driver
11282 11289 * properties.
11283 11290 */
11284 11291 if (nmatched == NULL) {
11285 11292 cmn_err(CE_WARN, "dtrace_enabling_match() "
11286 11293 "error on %p: %d", (void *)ep,
11287 11294 enab->dten_error);
11288 11295 }
11289 11296
11290 11297 return (enab->dten_error);
11291 11298 }
11292 11299 }
11293 11300
11294 11301 enab->dten_probegen = dtrace_probegen;
11295 11302 if (nmatched != NULL)
11296 11303 *nmatched = total_matched;
11297 11304
11298 11305 return (0);
11299 11306 }
11300 11307
11301 11308 static void
11302 11309 dtrace_enabling_matchall(void)
11303 11310 {
11304 11311 dtrace_enabling_t *enab;
11305 11312
11306 11313 mutex_enter(&cpu_lock);
11307 11314 mutex_enter(&dtrace_lock);
11308 11315
11309 11316 /*
11310 11317 * Iterate over all retained enablings to see if any probes match
11311 11318 * against them. We only perform this operation on enablings for which
11312 11319 * we have sufficient permissions by virtue of being in the global zone
11313 11320 * or in the same zone as the DTrace client. Because we can be called
11314 11321 * after dtrace_detach() has been called, we cannot assert that there
11315 11322 * are retained enablings. We can safely load from dtrace_retained,
11316 11323 * however: the taskq_destroy() at the end of dtrace_detach() will
11317 11324 * block pending our completion.
11318 11325 */
11319 11326 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11320 11327 dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
11321 11328 cred_t *cr = dcr->dcr_cred;
11322 11329 zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
11323 11330
11324 11331 if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
11325 11332 (zone == GLOBAL_ZONEID || getzoneid() == zone)))
11326 11333 (void) dtrace_enabling_match(enab, NULL);
11327 11334 }
11328 11335
11329 11336 mutex_exit(&dtrace_lock);
11330 11337 mutex_exit(&cpu_lock);
11331 11338 }
11332 11339
11333 11340 /*
11334 11341 * If an enabling is to be enabled without having matched probes (that is, if
11335 11342 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
11336 11343 * enabling must be _primed_ by creating an ECB for every ECB description.
11337 11344 * This must be done to assure that we know the number of speculations, the
11338 11345 * number of aggregations, the minimum buffer size needed, etc. before we
11339 11346 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
11340 11347 * enabling any probes, we create ECBs for every ECB decription, but with a
11341 11348 * NULL probe -- which is exactly what this function does.
11342 11349 */
11343 11350 static void
11344 11351 dtrace_enabling_prime(dtrace_state_t *state)
11345 11352 {
11346 11353 dtrace_enabling_t *enab;
11347 11354 int i;
11348 11355
11349 11356 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11350 11357 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11351 11358
11352 11359 if (enab->dten_vstate->dtvs_state != state)
11353 11360 continue;
11354 11361
11355 11362 /*
11356 11363 * We don't want to prime an enabling more than once, lest
11357 11364 * we allow a malicious user to induce resource exhaustion.
11358 11365 * (The ECBs that result from priming an enabling aren't
11359 11366 * leaked -- but they also aren't deallocated until the
11360 11367 * consumer state is destroyed.)
11361 11368 */
11362 11369 if (enab->dten_primed)
11363 11370 continue;
11364 11371
11365 11372 for (i = 0; i < enab->dten_ndesc; i++) {
11366 11373 enab->dten_current = enab->dten_desc[i];
11367 11374 (void) dtrace_probe_enable(NULL, enab);
11368 11375 }
11369 11376
11370 11377 enab->dten_primed = 1;
11371 11378 }
11372 11379 }
11373 11380
11374 11381 /*
11375 11382 * Called to indicate that probes should be provided due to retained
11376 11383 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
11377 11384 * must take an initial lap through the enabling calling the dtps_provide()
11378 11385 * entry point explicitly to allow for autocreated probes.
11379 11386 */
11380 11387 static void
11381 11388 dtrace_enabling_provide(dtrace_provider_t *prv)
11382 11389 {
11383 11390 int i, all = 0;
11384 11391 dtrace_probedesc_t desc;
11385 11392 dtrace_genid_t gen;
11386 11393
11387 11394 ASSERT(MUTEX_HELD(&dtrace_lock));
11388 11395 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
11389 11396
11390 11397 if (prv == NULL) {
11391 11398 all = 1;
11392 11399 prv = dtrace_provider;
11393 11400 }
11394 11401
11395 11402 do {
11396 11403 dtrace_enabling_t *enab;
11397 11404 void *parg = prv->dtpv_arg;
11398 11405
11399 11406 retry:
11400 11407 gen = dtrace_retained_gen;
11401 11408 for (enab = dtrace_retained; enab != NULL;
11402 11409 enab = enab->dten_next) {
11403 11410 for (i = 0; i < enab->dten_ndesc; i++) {
11404 11411 desc = enab->dten_desc[i]->dted_probe;
11405 11412 mutex_exit(&dtrace_lock);
11406 11413 prv->dtpv_pops.dtps_provide(parg, &desc);
11407 11414 mutex_enter(&dtrace_lock);
11408 11415 /*
11409 11416 * Process the retained enablings again if
11410 11417 * they have changed while we weren't holding
11411 11418 * dtrace_lock.
11412 11419 */
11413 11420 if (gen != dtrace_retained_gen)
11414 11421 goto retry;
11415 11422 }
11416 11423 }
11417 11424 } while (all && (prv = prv->dtpv_next) != NULL);
11418 11425
11419 11426 mutex_exit(&dtrace_lock);
11420 11427 dtrace_probe_provide(NULL, all ? NULL : prv);
11421 11428 mutex_enter(&dtrace_lock);
11422 11429 }
11423 11430
11424 11431 /*
11425 11432 * Called to reap ECBs that are attached to probes from defunct providers.
11426 11433 */
11427 11434 static void
11428 11435 dtrace_enabling_reap(void)
11429 11436 {
11430 11437 dtrace_provider_t *prov;
11431 11438 dtrace_probe_t *probe;
11432 11439 dtrace_ecb_t *ecb;
11433 11440 hrtime_t when;
11434 11441 int i;
11435 11442
11436 11443 mutex_enter(&cpu_lock);
11437 11444 mutex_enter(&dtrace_lock);
11438 11445
11439 11446 for (i = 0; i < dtrace_nprobes; i++) {
11440 11447 if ((probe = dtrace_probes[i]) == NULL)
11441 11448 continue;
11442 11449
11443 11450 if (probe->dtpr_ecb == NULL)
11444 11451 continue;
11445 11452
11446 11453 prov = probe->dtpr_provider;
11447 11454
11448 11455 if ((when = prov->dtpv_defunct) == 0)
11449 11456 continue;
11450 11457
11451 11458 /*
11452 11459 * We have ECBs on a defunct provider: we want to reap these
11453 11460 * ECBs to allow the provider to unregister. The destruction
11454 11461 * of these ECBs must be done carefully: if we destroy the ECB
11455 11462 * and the consumer later wishes to consume an EPID that
11456 11463 * corresponds to the destroyed ECB (and if the EPID metadata
11457 11464 * has not been previously consumed), the consumer will abort
11458 11465 * processing on the unknown EPID. To reduce (but not, sadly,
11459 11466 * eliminate) the possibility of this, we will only destroy an
11460 11467 * ECB for a defunct provider if, for the state that
11461 11468 * corresponds to the ECB:
11462 11469 *
11463 11470 * (a) There is no speculative tracing (which can effectively
11464 11471 * cache an EPID for an arbitrary amount of time).
11465 11472 *
11466 11473 * (b) The principal buffers have been switched twice since the
11467 11474 * provider became defunct.
11468 11475 *
11469 11476 * (c) The aggregation buffers are of zero size or have been
11470 11477 * switched twice since the provider became defunct.
11471 11478 *
11472 11479 * We use dts_speculates to determine (a) and call a function
11473 11480 * (dtrace_buffer_consumed()) to determine (b) and (c). Note
11474 11481 * that as soon as we've been unable to destroy one of the ECBs
11475 11482 * associated with the probe, we quit trying -- reaping is only
11476 11483 * fruitful in as much as we can destroy all ECBs associated
11477 11484 * with the defunct provider's probes.
11478 11485 */
11479 11486 while ((ecb = probe->dtpr_ecb) != NULL) {
11480 11487 dtrace_state_t *state = ecb->dte_state;
11481 11488 dtrace_buffer_t *buf = state->dts_buffer;
11482 11489 dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
11483 11490
11484 11491 if (state->dts_speculates)
11485 11492 break;
11486 11493
11487 11494 if (!dtrace_buffer_consumed(buf, when))
11488 11495 break;
11489 11496
11490 11497 if (!dtrace_buffer_consumed(aggbuf, when))
11491 11498 break;
11492 11499
11493 11500 dtrace_ecb_disable(ecb);
11494 11501 ASSERT(probe->dtpr_ecb != ecb);
11495 11502 dtrace_ecb_destroy(ecb);
11496 11503 }
11497 11504 }
11498 11505
11499 11506 mutex_exit(&dtrace_lock);
11500 11507 mutex_exit(&cpu_lock);
11501 11508 }
11502 11509
11503 11510 /*
11504 11511 * DTrace DOF Functions
11505 11512 */
11506 11513 /*ARGSUSED*/
11507 11514 static void
11508 11515 dtrace_dof_error(dof_hdr_t *dof, const char *str)
11509 11516 {
11510 11517 if (dtrace_err_verbose)
11511 11518 cmn_err(CE_WARN, "failed to process DOF: %s", str);
11512 11519
11513 11520 #ifdef DTRACE_ERRDEBUG
11514 11521 dtrace_errdebug(str);
11515 11522 #endif
11516 11523 }
11517 11524
11518 11525 /*
11519 11526 * Create DOF out of a currently enabled state. Right now, we only create
11520 11527 * DOF containing the run-time options -- but this could be expanded to create
11521 11528 * complete DOF representing the enabled state.
11522 11529 */
11523 11530 static dof_hdr_t *
11524 11531 dtrace_dof_create(dtrace_state_t *state)
11525 11532 {
11526 11533 dof_hdr_t *dof;
11527 11534 dof_sec_t *sec;
11528 11535 dof_optdesc_t *opt;
11529 11536 int i, len = sizeof (dof_hdr_t) +
11530 11537 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
11531 11538 sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11532 11539
11533 11540 ASSERT(MUTEX_HELD(&dtrace_lock));
11534 11541
11535 11542 dof = kmem_zalloc(len, KM_SLEEP);
11536 11543 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
11537 11544 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
11538 11545 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
11539 11546 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
11540 11547
11541 11548 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
11542 11549 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
11543 11550 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
11544 11551 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
11545 11552 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
11546 11553 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
11547 11554
11548 11555 dof->dofh_flags = 0;
11549 11556 dof->dofh_hdrsize = sizeof (dof_hdr_t);
11550 11557 dof->dofh_secsize = sizeof (dof_sec_t);
11551 11558 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
11552 11559 dof->dofh_secoff = sizeof (dof_hdr_t);
11553 11560 dof->dofh_loadsz = len;
11554 11561 dof->dofh_filesz = len;
11555 11562 dof->dofh_pad = 0;
11556 11563
11557 11564 /*
11558 11565 * Fill in the option section header...
11559 11566 */
11560 11567 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
11561 11568 sec->dofs_type = DOF_SECT_OPTDESC;
11562 11569 sec->dofs_align = sizeof (uint64_t);
11563 11570 sec->dofs_flags = DOF_SECF_LOAD;
11564 11571 sec->dofs_entsize = sizeof (dof_optdesc_t);
11565 11572
11566 11573 opt = (dof_optdesc_t *)((uintptr_t)sec +
11567 11574 roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
11568 11575
11569 11576 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
11570 11577 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11571 11578
11572 11579 for (i = 0; i < DTRACEOPT_MAX; i++) {
11573 11580 opt[i].dofo_option = i;
11574 11581 opt[i].dofo_strtab = DOF_SECIDX_NONE;
11575 11582 opt[i].dofo_value = state->dts_options[i];
11576 11583 }
11577 11584
11578 11585 return (dof);
11579 11586 }
11580 11587
11581 11588 static dof_hdr_t *
11582 11589 dtrace_dof_copyin(uintptr_t uarg, int *errp)
11583 11590 {
11584 11591 dof_hdr_t hdr, *dof;
11585 11592
11586 11593 ASSERT(!MUTEX_HELD(&dtrace_lock));
11587 11594
11588 11595 /*
11589 11596 * First, we're going to copyin() the sizeof (dof_hdr_t).
11590 11597 */
11591 11598 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
11592 11599 dtrace_dof_error(NULL, "failed to copyin DOF header");
11593 11600 *errp = EFAULT;
11594 11601 return (NULL);
11595 11602 }
11596 11603
11597 11604 /*
11598 11605 * Now we'll allocate the entire DOF and copy it in -- provided
11599 11606 * that the length isn't outrageous.
11600 11607 */
11601 11608 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
11602 11609 dtrace_dof_error(&hdr, "load size exceeds maximum");
11603 11610 *errp = E2BIG;
11604 11611 return (NULL);
11605 11612 }
11606 11613
11607 11614 if (hdr.dofh_loadsz < sizeof (hdr)) {
11608 11615 dtrace_dof_error(&hdr, "invalid load size");
11609 11616 *errp = EINVAL;
11610 11617 return (NULL);
11611 11618 }
11612 11619
11613 11620 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
11614 11621
11615 11622 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
11616 11623 dof->dofh_loadsz != hdr.dofh_loadsz) {
11617 11624 kmem_free(dof, hdr.dofh_loadsz);
11618 11625 *errp = EFAULT;
11619 11626 return (NULL);
11620 11627 }
11621 11628
11622 11629 return (dof);
11623 11630 }
11624 11631
11625 11632 static dof_hdr_t *
11626 11633 dtrace_dof_property(const char *name)
11627 11634 {
11628 11635 uchar_t *buf;
11629 11636 uint64_t loadsz;
11630 11637 unsigned int len, i;
11631 11638 dof_hdr_t *dof;
11632 11639
11633 11640 /*
11634 11641 * Unfortunately, array of values in .conf files are always (and
11635 11642 * only) interpreted to be integer arrays. We must read our DOF
11636 11643 * as an integer array, and then squeeze it into a byte array.
11637 11644 */
11638 11645 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
11639 11646 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
11640 11647 return (NULL);
11641 11648
11642 11649 for (i = 0; i < len; i++)
11643 11650 buf[i] = (uchar_t)(((int *)buf)[i]);
11644 11651
11645 11652 if (len < sizeof (dof_hdr_t)) {
11646 11653 ddi_prop_free(buf);
11647 11654 dtrace_dof_error(NULL, "truncated header");
11648 11655 return (NULL);
11649 11656 }
11650 11657
11651 11658 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
11652 11659 ddi_prop_free(buf);
11653 11660 dtrace_dof_error(NULL, "truncated DOF");
11654 11661 return (NULL);
11655 11662 }
11656 11663
11657 11664 if (loadsz >= dtrace_dof_maxsize) {
11658 11665 ddi_prop_free(buf);
11659 11666 dtrace_dof_error(NULL, "oversized DOF");
11660 11667 return (NULL);
11661 11668 }
11662 11669
11663 11670 dof = kmem_alloc(loadsz, KM_SLEEP);
11664 11671 bcopy(buf, dof, loadsz);
11665 11672 ddi_prop_free(buf);
11666 11673
11667 11674 return (dof);
11668 11675 }
11669 11676
11670 11677 static void
11671 11678 dtrace_dof_destroy(dof_hdr_t *dof)
11672 11679 {
11673 11680 kmem_free(dof, dof->dofh_loadsz);
11674 11681 }
11675 11682
11676 11683 /*
11677 11684 * Return the dof_sec_t pointer corresponding to a given section index. If the
11678 11685 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11679 11686 * a type other than DOF_SECT_NONE is specified, the header is checked against
11680 11687 * this type and NULL is returned if the types do not match.
11681 11688 */
11682 11689 static dof_sec_t *
11683 11690 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
11684 11691 {
11685 11692 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
11686 11693 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
11687 11694
11688 11695 if (i >= dof->dofh_secnum) {
11689 11696 dtrace_dof_error(dof, "referenced section index is invalid");
11690 11697 return (NULL);
11691 11698 }
11692 11699
11693 11700 if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
11694 11701 dtrace_dof_error(dof, "referenced section is not loadable");
11695 11702 return (NULL);
11696 11703 }
11697 11704
11698 11705 if (type != DOF_SECT_NONE && type != sec->dofs_type) {
11699 11706 dtrace_dof_error(dof, "referenced section is the wrong type");
11700 11707 return (NULL);
11701 11708 }
11702 11709
11703 11710 return (sec);
11704 11711 }
11705 11712
11706 11713 static dtrace_probedesc_t *
11707 11714 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
11708 11715 {
11709 11716 dof_probedesc_t *probe;
11710 11717 dof_sec_t *strtab;
11711 11718 uintptr_t daddr = (uintptr_t)dof;
11712 11719 uintptr_t str;
11713 11720 size_t size;
11714 11721
11715 11722 if (sec->dofs_type != DOF_SECT_PROBEDESC) {
11716 11723 dtrace_dof_error(dof, "invalid probe section");
11717 11724 return (NULL);
11718 11725 }
11719 11726
11720 11727 if (sec->dofs_align != sizeof (dof_secidx_t)) {
11721 11728 dtrace_dof_error(dof, "bad alignment in probe description");
11722 11729 return (NULL);
11723 11730 }
11724 11731
11725 11732 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
11726 11733 dtrace_dof_error(dof, "truncated probe description");
11727 11734 return (NULL);
11728 11735 }
11729 11736
11730 11737 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
11731 11738 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
11732 11739
11733 11740 if (strtab == NULL)
11734 11741 return (NULL);
11735 11742
11736 11743 str = daddr + strtab->dofs_offset;
11737 11744 size = strtab->dofs_size;
11738 11745
11739 11746 if (probe->dofp_provider >= strtab->dofs_size) {
11740 11747 dtrace_dof_error(dof, "corrupt probe provider");
11741 11748 return (NULL);
11742 11749 }
11743 11750
11744 11751 (void) strncpy(desc->dtpd_provider,
11745 11752 (char *)(str + probe->dofp_provider),
11746 11753 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
11747 11754
11748 11755 if (probe->dofp_mod >= strtab->dofs_size) {
11749 11756 dtrace_dof_error(dof, "corrupt probe module");
11750 11757 return (NULL);
11751 11758 }
11752 11759
11753 11760 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
11754 11761 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
11755 11762
11756 11763 if (probe->dofp_func >= strtab->dofs_size) {
11757 11764 dtrace_dof_error(dof, "corrupt probe function");
11758 11765 return (NULL);
11759 11766 }
11760 11767
11761 11768 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
11762 11769 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
11763 11770
11764 11771 if (probe->dofp_name >= strtab->dofs_size) {
11765 11772 dtrace_dof_error(dof, "corrupt probe name");
11766 11773 return (NULL);
11767 11774 }
11768 11775
11769 11776 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
11770 11777 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
11771 11778
11772 11779 return (desc);
11773 11780 }
11774 11781
11775 11782 static dtrace_difo_t *
11776 11783 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11777 11784 cred_t *cr)
11778 11785 {
11779 11786 dtrace_difo_t *dp;
11780 11787 size_t ttl = 0;
11781 11788 dof_difohdr_t *dofd;
11782 11789 uintptr_t daddr = (uintptr_t)dof;
11783 11790 size_t max = dtrace_difo_maxsize;
11784 11791 int i, l, n;
11785 11792
11786 11793 static const struct {
11787 11794 int section;
11788 11795 int bufoffs;
11789 11796 int lenoffs;
11790 11797 int entsize;
11791 11798 int align;
11792 11799 const char *msg;
11793 11800 } difo[] = {
11794 11801 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
11795 11802 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
11796 11803 sizeof (dif_instr_t), "multiple DIF sections" },
11797 11804
11798 11805 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
11799 11806 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
11800 11807 sizeof (uint64_t), "multiple integer tables" },
11801 11808
11802 11809 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
11803 11810 offsetof(dtrace_difo_t, dtdo_strlen), 0,
11804 11811 sizeof (char), "multiple string tables" },
11805 11812
11806 11813 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
11807 11814 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
11808 11815 sizeof (uint_t), "multiple variable tables" },
11809 11816
11810 11817 { DOF_SECT_NONE, 0, 0, 0, NULL }
11811 11818 };
11812 11819
11813 11820 if (sec->dofs_type != DOF_SECT_DIFOHDR) {
11814 11821 dtrace_dof_error(dof, "invalid DIFO header section");
11815 11822 return (NULL);
11816 11823 }
11817 11824
11818 11825 if (sec->dofs_align != sizeof (dof_secidx_t)) {
11819 11826 dtrace_dof_error(dof, "bad alignment in DIFO header");
11820 11827 return (NULL);
11821 11828 }
11822 11829
11823 11830 if (sec->dofs_size < sizeof (dof_difohdr_t) ||
11824 11831 sec->dofs_size % sizeof (dof_secidx_t)) {
11825 11832 dtrace_dof_error(dof, "bad size in DIFO header");
11826 11833 return (NULL);
11827 11834 }
11828 11835
11829 11836 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
11830 11837 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
11831 11838
11832 11839 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
11833 11840 dp->dtdo_rtype = dofd->dofd_rtype;
11834 11841
11835 11842 for (l = 0; l < n; l++) {
11836 11843 dof_sec_t *subsec;
11837 11844 void **bufp;
11838 11845 uint32_t *lenp;
11839 11846
11840 11847 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
11841 11848 dofd->dofd_links[l])) == NULL)
11842 11849 goto err; /* invalid section link */
11843 11850
11844 11851 if (ttl + subsec->dofs_size > max) {
11845 11852 dtrace_dof_error(dof, "exceeds maximum size");
11846 11853 goto err;
11847 11854 }
11848 11855
11849 11856 ttl += subsec->dofs_size;
11850 11857
11851 11858 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
11852 11859 if (subsec->dofs_type != difo[i].section)
11853 11860 continue;
11854 11861
11855 11862 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
11856 11863 dtrace_dof_error(dof, "section not loaded");
11857 11864 goto err;
11858 11865 }
11859 11866
11860 11867 if (subsec->dofs_align != difo[i].align) {
11861 11868 dtrace_dof_error(dof, "bad alignment");
11862 11869 goto err;
11863 11870 }
11864 11871
11865 11872 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
11866 11873 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
11867 11874
11868 11875 if (*bufp != NULL) {
11869 11876 dtrace_dof_error(dof, difo[i].msg);
11870 11877 goto err;
11871 11878 }
11872 11879
11873 11880 if (difo[i].entsize != subsec->dofs_entsize) {
11874 11881 dtrace_dof_error(dof, "entry size mismatch");
11875 11882 goto err;
11876 11883 }
11877 11884
11878 11885 if (subsec->dofs_entsize != 0 &&
11879 11886 (subsec->dofs_size % subsec->dofs_entsize) != 0) {
11880 11887 dtrace_dof_error(dof, "corrupt entry size");
11881 11888 goto err;
11882 11889 }
11883 11890
11884 11891 *lenp = subsec->dofs_size;
11885 11892 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
11886 11893 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
11887 11894 *bufp, subsec->dofs_size);
11888 11895
11889 11896 if (subsec->dofs_entsize != 0)
11890 11897 *lenp /= subsec->dofs_entsize;
11891 11898
11892 11899 break;
11893 11900 }
11894 11901
11895 11902 /*
11896 11903 * If we encounter a loadable DIFO sub-section that is not
11897 11904 * known to us, assume this is a broken program and fail.
11898 11905 */
11899 11906 if (difo[i].section == DOF_SECT_NONE &&
11900 11907 (subsec->dofs_flags & DOF_SECF_LOAD)) {
11901 11908 dtrace_dof_error(dof, "unrecognized DIFO subsection");
11902 11909 goto err;
11903 11910 }
11904 11911 }
11905 11912
11906 11913 if (dp->dtdo_buf == NULL) {
11907 11914 /*
11908 11915 * We can't have a DIF object without DIF text.
11909 11916 */
11910 11917 dtrace_dof_error(dof, "missing DIF text");
11911 11918 goto err;
11912 11919 }
11913 11920
11914 11921 /*
11915 11922 * Before we validate the DIF object, run through the variable table
11916 11923 * looking for the strings -- if any of their size are under, we'll set
11917 11924 * their size to be the system-wide default string size. Note that
11918 11925 * this should _not_ happen if the "strsize" option has been set --
11919 11926 * in this case, the compiler should have set the size to reflect the
11920 11927 * setting of the option.
11921 11928 */
11922 11929 for (i = 0; i < dp->dtdo_varlen; i++) {
11923 11930 dtrace_difv_t *v = &dp->dtdo_vartab[i];
11924 11931 dtrace_diftype_t *t = &v->dtdv_type;
11925 11932
11926 11933 if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
11927 11934 continue;
11928 11935
11929 11936 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
11930 11937 t->dtdt_size = dtrace_strsize_default;
11931 11938 }
11932 11939
11933 11940 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
11934 11941 goto err;
11935 11942
11936 11943 dtrace_difo_init(dp, vstate);
11937 11944 return (dp);
11938 11945
11939 11946 err:
11940 11947 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
11941 11948 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
11942 11949 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
11943 11950 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
11944 11951
11945 11952 kmem_free(dp, sizeof (dtrace_difo_t));
11946 11953 return (NULL);
11947 11954 }
11948 11955
11949 11956 static dtrace_predicate_t *
11950 11957 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11951 11958 cred_t *cr)
11952 11959 {
11953 11960 dtrace_difo_t *dp;
11954 11961
11955 11962 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
11956 11963 return (NULL);
11957 11964
11958 11965 return (dtrace_predicate_create(dp));
11959 11966 }
11960 11967
11961 11968 static dtrace_actdesc_t *
11962 11969 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11963 11970 cred_t *cr)
11964 11971 {
11965 11972 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
11966 11973 dof_actdesc_t *desc;
11967 11974 dof_sec_t *difosec;
11968 11975 size_t offs;
11969 11976 uintptr_t daddr = (uintptr_t)dof;
11970 11977 uint64_t arg;
11971 11978 dtrace_actkind_t kind;
11972 11979
11973 11980 if (sec->dofs_type != DOF_SECT_ACTDESC) {
11974 11981 dtrace_dof_error(dof, "invalid action section");
11975 11982 return (NULL);
11976 11983 }
11977 11984
11978 11985 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
11979 11986 dtrace_dof_error(dof, "truncated action description");
11980 11987 return (NULL);
11981 11988 }
11982 11989
11983 11990 if (sec->dofs_align != sizeof (uint64_t)) {
11984 11991 dtrace_dof_error(dof, "bad alignment in action description");
11985 11992 return (NULL);
11986 11993 }
11987 11994
11988 11995 if (sec->dofs_size < sec->dofs_entsize) {
11989 11996 dtrace_dof_error(dof, "section entry size exceeds total size");
11990 11997 return (NULL);
11991 11998 }
11992 11999
11993 12000 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
11994 12001 dtrace_dof_error(dof, "bad entry size in action description");
11995 12002 return (NULL);
11996 12003 }
11997 12004
11998 12005 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
11999 12006 dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
12000 12007 return (NULL);
12001 12008 }
12002 12009
12003 12010 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
12004 12011 desc = (dof_actdesc_t *)(daddr +
12005 12012 (uintptr_t)sec->dofs_offset + offs);
12006 12013 kind = (dtrace_actkind_t)desc->dofa_kind;
12007 12014
12008 12015 if ((DTRACEACT_ISPRINTFLIKE(kind) &&
12009 12016 (kind != DTRACEACT_PRINTA ||
12010 12017 desc->dofa_strtab != DOF_SECIDX_NONE)) ||
12011 12018 (kind == DTRACEACT_DIFEXPR &&
12012 12019 desc->dofa_strtab != DOF_SECIDX_NONE)) {
12013 12020 dof_sec_t *strtab;
12014 12021 char *str, *fmt;
12015 12022 uint64_t i;
12016 12023
12017 12024 /*
12018 12025 * The argument to these actions is an index into the
12019 12026 * DOF string table. For printf()-like actions, this
12020 12027 * is the format string. For print(), this is the
12021 12028 * CTF type of the expression result.
12022 12029 */
12023 12030 if ((strtab = dtrace_dof_sect(dof,
12024 12031 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
12025 12032 goto err;
12026 12033
12027 12034 str = (char *)((uintptr_t)dof +
12028 12035 (uintptr_t)strtab->dofs_offset);
12029 12036
12030 12037 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
12031 12038 if (str[i] == '\0')
12032 12039 break;
12033 12040 }
12034 12041
12035 12042 if (i >= strtab->dofs_size) {
12036 12043 dtrace_dof_error(dof, "bogus format string");
12037 12044 goto err;
12038 12045 }
12039 12046
12040 12047 if (i == desc->dofa_arg) {
12041 12048 dtrace_dof_error(dof, "empty format string");
12042 12049 goto err;
12043 12050 }
12044 12051
12045 12052 i -= desc->dofa_arg;
12046 12053 fmt = kmem_alloc(i + 1, KM_SLEEP);
12047 12054 bcopy(&str[desc->dofa_arg], fmt, i + 1);
12048 12055 arg = (uint64_t)(uintptr_t)fmt;
12049 12056 } else {
12050 12057 if (kind == DTRACEACT_PRINTA) {
12051 12058 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
12052 12059 arg = 0;
12053 12060 } else {
12054 12061 arg = desc->dofa_arg;
12055 12062 }
12056 12063 }
12057 12064
12058 12065 act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
12059 12066 desc->dofa_uarg, arg);
12060 12067
12061 12068 if (last != NULL) {
12062 12069 last->dtad_next = act;
12063 12070 } else {
12064 12071 first = act;
12065 12072 }
12066 12073
12067 12074 last = act;
12068 12075
12069 12076 if (desc->dofa_difo == DOF_SECIDX_NONE)
12070 12077 continue;
12071 12078
12072 12079 if ((difosec = dtrace_dof_sect(dof,
12073 12080 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
12074 12081 goto err;
12075 12082
12076 12083 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
12077 12084
12078 12085 if (act->dtad_difo == NULL)
12079 12086 goto err;
12080 12087 }
12081 12088
12082 12089 ASSERT(first != NULL);
12083 12090 return (first);
12084 12091
12085 12092 err:
12086 12093 for (act = first; act != NULL; act = next) {
12087 12094 next = act->dtad_next;
12088 12095 dtrace_actdesc_release(act, vstate);
12089 12096 }
12090 12097
12091 12098 return (NULL);
12092 12099 }
12093 12100
12094 12101 static dtrace_ecbdesc_t *
12095 12102 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12096 12103 cred_t *cr)
12097 12104 {
12098 12105 dtrace_ecbdesc_t *ep;
12099 12106 dof_ecbdesc_t *ecb;
12100 12107 dtrace_probedesc_t *desc;
12101 12108 dtrace_predicate_t *pred = NULL;
12102 12109
12103 12110 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
12104 12111 dtrace_dof_error(dof, "truncated ECB description");
12105 12112 return (NULL);
12106 12113 }
12107 12114
12108 12115 if (sec->dofs_align != sizeof (uint64_t)) {
12109 12116 dtrace_dof_error(dof, "bad alignment in ECB description");
12110 12117 return (NULL);
12111 12118 }
12112 12119
12113 12120 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
12114 12121 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
12115 12122
12116 12123 if (sec == NULL)
12117 12124 return (NULL);
12118 12125
12119 12126 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
12120 12127 ep->dted_uarg = ecb->dofe_uarg;
12121 12128 desc = &ep->dted_probe;
12122 12129
12123 12130 if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
12124 12131 goto err;
12125 12132
12126 12133 if (ecb->dofe_pred != DOF_SECIDX_NONE) {
12127 12134 if ((sec = dtrace_dof_sect(dof,
12128 12135 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
12129 12136 goto err;
12130 12137
12131 12138 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
12132 12139 goto err;
12133 12140
12134 12141 ep->dted_pred.dtpdd_predicate = pred;
12135 12142 }
12136 12143
12137 12144 if (ecb->dofe_actions != DOF_SECIDX_NONE) {
12138 12145 if ((sec = dtrace_dof_sect(dof,
12139 12146 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
12140 12147 goto err;
12141 12148
12142 12149 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
12143 12150
12144 12151 if (ep->dted_action == NULL)
12145 12152 goto err;
12146 12153 }
12147 12154
12148 12155 return (ep);
12149 12156
12150 12157 err:
12151 12158 if (pred != NULL)
12152 12159 dtrace_predicate_release(pred, vstate);
12153 12160 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12154 12161 return (NULL);
12155 12162 }
12156 12163
12157 12164 /*
12158 12165 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
12159 12166 * specified DOF. At present, this amounts to simply adding 'ubase' to the
12160 12167 * site of any user SETX relocations to account for load object base address.
12161 12168 * In the future, if we need other relocations, this function can be extended.
12162 12169 */
12163 12170 static int
12164 12171 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase)
12165 12172 {
12166 12173 uintptr_t daddr = (uintptr_t)dof;
12167 12174 dof_relohdr_t *dofr =
12168 12175 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
12169 12176 dof_sec_t *ss, *rs, *ts;
12170 12177 dof_relodesc_t *r;
12171 12178 uint_t i, n;
12172 12179
12173 12180 if (sec->dofs_size < sizeof (dof_relohdr_t) ||
12174 12181 sec->dofs_align != sizeof (dof_secidx_t)) {
12175 12182 dtrace_dof_error(dof, "invalid relocation header");
12176 12183 return (-1);
12177 12184 }
12178 12185
12179 12186 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
12180 12187 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
12181 12188 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
12182 12189
12183 12190 if (ss == NULL || rs == NULL || ts == NULL)
12184 12191 return (-1); /* dtrace_dof_error() has been called already */
12185 12192
12186 12193 if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
12187 12194 rs->dofs_align != sizeof (uint64_t)) {
12188 12195 dtrace_dof_error(dof, "invalid relocation section");
12189 12196 return (-1);
12190 12197 }
12191 12198
12192 12199 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
12193 12200 n = rs->dofs_size / rs->dofs_entsize;
12194 12201
12195 12202 for (i = 0; i < n; i++) {
12196 12203 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
12197 12204
12198 12205 switch (r->dofr_type) {
12199 12206 case DOF_RELO_NONE:
12200 12207 break;
12201 12208 case DOF_RELO_SETX:
12202 12209 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
12203 12210 sizeof (uint64_t) > ts->dofs_size) {
12204 12211 dtrace_dof_error(dof, "bad relocation offset");
12205 12212 return (-1);
12206 12213 }
12207 12214
12208 12215 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
12209 12216 dtrace_dof_error(dof, "misaligned setx relo");
12210 12217 return (-1);
12211 12218 }
12212 12219
12213 12220 *(uint64_t *)taddr += ubase;
12214 12221 break;
12215 12222 default:
12216 12223 dtrace_dof_error(dof, "invalid relocation type");
12217 12224 return (-1);
12218 12225 }
12219 12226
12220 12227 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
12221 12228 }
12222 12229
12223 12230 return (0);
12224 12231 }
12225 12232
12226 12233 /*
12227 12234 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
12228 12235 * header: it should be at the front of a memory region that is at least
12229 12236 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
12230 12237 * size. It need not be validated in any other way.
12231 12238 */
12232 12239 static int
12233 12240 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
12234 12241 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
12235 12242 {
12236 12243 uint64_t len = dof->dofh_loadsz, seclen;
12237 12244 uintptr_t daddr = (uintptr_t)dof;
12238 12245 dtrace_ecbdesc_t *ep;
12239 12246 dtrace_enabling_t *enab;
12240 12247 uint_t i;
12241 12248
12242 12249 ASSERT(MUTEX_HELD(&dtrace_lock));
12243 12250 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
12244 12251
12245 12252 /*
12246 12253 * Check the DOF header identification bytes. In addition to checking
12247 12254 * valid settings, we also verify that unused bits/bytes are zeroed so
12248 12255 * we can use them later without fear of regressing existing binaries.
12249 12256 */
12250 12257 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
12251 12258 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
12252 12259 dtrace_dof_error(dof, "DOF magic string mismatch");
12253 12260 return (-1);
12254 12261 }
12255 12262
12256 12263 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
12257 12264 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
12258 12265 dtrace_dof_error(dof, "DOF has invalid data model");
12259 12266 return (-1);
12260 12267 }
12261 12268
12262 12269 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
12263 12270 dtrace_dof_error(dof, "DOF encoding mismatch");
12264 12271 return (-1);
12265 12272 }
12266 12273
12267 12274 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
12268 12275 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
12269 12276 dtrace_dof_error(dof, "DOF version mismatch");
12270 12277 return (-1);
12271 12278 }
12272 12279
12273 12280 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
12274 12281 dtrace_dof_error(dof, "DOF uses unsupported instruction set");
12275 12282 return (-1);
12276 12283 }
12277 12284
12278 12285 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
12279 12286 dtrace_dof_error(dof, "DOF uses too many integer registers");
12280 12287 return (-1);
12281 12288 }
12282 12289
12283 12290 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
12284 12291 dtrace_dof_error(dof, "DOF uses too many tuple registers");
12285 12292 return (-1);
12286 12293 }
12287 12294
12288 12295 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
12289 12296 if (dof->dofh_ident[i] != 0) {
12290 12297 dtrace_dof_error(dof, "DOF has invalid ident byte set");
12291 12298 return (-1);
12292 12299 }
12293 12300 }
12294 12301
12295 12302 if (dof->dofh_flags & ~DOF_FL_VALID) {
12296 12303 dtrace_dof_error(dof, "DOF has invalid flag bits set");
12297 12304 return (-1);
12298 12305 }
12299 12306
12300 12307 if (dof->dofh_secsize == 0) {
12301 12308 dtrace_dof_error(dof, "zero section header size");
12302 12309 return (-1);
12303 12310 }
12304 12311
12305 12312 /*
12306 12313 * Check that the section headers don't exceed the amount of DOF
12307 12314 * data. Note that we cast the section size and number of sections
12308 12315 * to uint64_t's to prevent possible overflow in the multiplication.
12309 12316 */
12310 12317 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
12311 12318
12312 12319 if (dof->dofh_secoff > len || seclen > len ||
12313 12320 dof->dofh_secoff + seclen > len) {
12314 12321 dtrace_dof_error(dof, "truncated section headers");
12315 12322 return (-1);
12316 12323 }
12317 12324
12318 12325 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
12319 12326 dtrace_dof_error(dof, "misaligned section headers");
12320 12327 return (-1);
12321 12328 }
12322 12329
12323 12330 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
12324 12331 dtrace_dof_error(dof, "misaligned section size");
12325 12332 return (-1);
12326 12333 }
12327 12334
12328 12335 /*
12329 12336 * Take an initial pass through the section headers to be sure that
12330 12337 * the headers don't have stray offsets. If the 'noprobes' flag is
12331 12338 * set, do not permit sections relating to providers, probes, or args.
12332 12339 */
12333 12340 for (i = 0; i < dof->dofh_secnum; i++) {
12334 12341 dof_sec_t *sec = (dof_sec_t *)(daddr +
12335 12342 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12336 12343
12337 12344 if (noprobes) {
12338 12345 switch (sec->dofs_type) {
12339 12346 case DOF_SECT_PROVIDER:
12340 12347 case DOF_SECT_PROBES:
12341 12348 case DOF_SECT_PRARGS:
12342 12349 case DOF_SECT_PROFFS:
12343 12350 dtrace_dof_error(dof, "illegal sections "
12344 12351 "for enabling");
12345 12352 return (-1);
12346 12353 }
12347 12354 }
12348 12355
12349 12356 if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
12350 12357 !(sec->dofs_flags & DOF_SECF_LOAD)) {
12351 12358 dtrace_dof_error(dof, "loadable section with load "
12352 12359 "flag unset");
12353 12360 return (-1);
12354 12361 }
12355 12362
12356 12363 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12357 12364 continue; /* just ignore non-loadable sections */
12358 12365
12359 12366 if (sec->dofs_align & (sec->dofs_align - 1)) {
12360 12367 dtrace_dof_error(dof, "bad section alignment");
12361 12368 return (-1);
12362 12369 }
12363 12370
12364 12371 if (sec->dofs_offset & (sec->dofs_align - 1)) {
12365 12372 dtrace_dof_error(dof, "misaligned section");
12366 12373 return (-1);
12367 12374 }
12368 12375
12369 12376 if (sec->dofs_offset > len || sec->dofs_size > len ||
12370 12377 sec->dofs_offset + sec->dofs_size > len) {
12371 12378 dtrace_dof_error(dof, "corrupt section header");
12372 12379 return (-1);
12373 12380 }
12374 12381
12375 12382 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
12376 12383 sec->dofs_offset + sec->dofs_size - 1) != '\0') {
12377 12384 dtrace_dof_error(dof, "non-terminating string table");
12378 12385 return (-1);
12379 12386 }
12380 12387 }
12381 12388
12382 12389 /*
12383 12390 * Take a second pass through the sections and locate and perform any
12384 12391 * relocations that are present. We do this after the first pass to
12385 12392 * be sure that all sections have had their headers validated.
12386 12393 */
12387 12394 for (i = 0; i < dof->dofh_secnum; i++) {
12388 12395 dof_sec_t *sec = (dof_sec_t *)(daddr +
12389 12396 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12390 12397
12391 12398 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12392 12399 continue; /* skip sections that are not loadable */
12393 12400
12394 12401 switch (sec->dofs_type) {
12395 12402 case DOF_SECT_URELHDR:
12396 12403 if (dtrace_dof_relocate(dof, sec, ubase) != 0)
12397 12404 return (-1);
12398 12405 break;
12399 12406 }
12400 12407 }
12401 12408
12402 12409 if ((enab = *enabp) == NULL)
12403 12410 enab = *enabp = dtrace_enabling_create(vstate);
12404 12411
12405 12412 for (i = 0; i < dof->dofh_secnum; i++) {
12406 12413 dof_sec_t *sec = (dof_sec_t *)(daddr +
12407 12414 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12408 12415
12409 12416 if (sec->dofs_type != DOF_SECT_ECBDESC)
12410 12417 continue;
12411 12418
12412 12419 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
12413 12420 dtrace_enabling_destroy(enab);
12414 12421 *enabp = NULL;
12415 12422 return (-1);
12416 12423 }
12417 12424
12418 12425 dtrace_enabling_add(enab, ep);
12419 12426 }
12420 12427
12421 12428 return (0);
12422 12429 }
12423 12430
12424 12431 /*
12425 12432 * Process DOF for any options. This routine assumes that the DOF has been
12426 12433 * at least processed by dtrace_dof_slurp().
12427 12434 */
12428 12435 static int
12429 12436 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
12430 12437 {
12431 12438 int i, rval;
12432 12439 uint32_t entsize;
12433 12440 size_t offs;
12434 12441 dof_optdesc_t *desc;
12435 12442
12436 12443 for (i = 0; i < dof->dofh_secnum; i++) {
12437 12444 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
12438 12445 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12439 12446
12440 12447 if (sec->dofs_type != DOF_SECT_OPTDESC)
12441 12448 continue;
12442 12449
12443 12450 if (sec->dofs_align != sizeof (uint64_t)) {
12444 12451 dtrace_dof_error(dof, "bad alignment in "
12445 12452 "option description");
12446 12453 return (EINVAL);
12447 12454 }
12448 12455
12449 12456 if ((entsize = sec->dofs_entsize) == 0) {
12450 12457 dtrace_dof_error(dof, "zeroed option entry size");
12451 12458 return (EINVAL);
12452 12459 }
12453 12460
12454 12461 if (entsize < sizeof (dof_optdesc_t)) {
12455 12462 dtrace_dof_error(dof, "bad option entry size");
12456 12463 return (EINVAL);
12457 12464 }
12458 12465
12459 12466 for (offs = 0; offs < sec->dofs_size; offs += entsize) {
12460 12467 desc = (dof_optdesc_t *)((uintptr_t)dof +
12461 12468 (uintptr_t)sec->dofs_offset + offs);
12462 12469
12463 12470 if (desc->dofo_strtab != DOF_SECIDX_NONE) {
12464 12471 dtrace_dof_error(dof, "non-zero option string");
12465 12472 return (EINVAL);
12466 12473 }
12467 12474
12468 12475 if (desc->dofo_value == DTRACEOPT_UNSET) {
12469 12476 dtrace_dof_error(dof, "unset option");
12470 12477 return (EINVAL);
12471 12478 }
12472 12479
12473 12480 if ((rval = dtrace_state_option(state,
12474 12481 desc->dofo_option, desc->dofo_value)) != 0) {
12475 12482 dtrace_dof_error(dof, "rejected option");
12476 12483 return (rval);
12477 12484 }
12478 12485 }
12479 12486 }
12480 12487
12481 12488 return (0);
12482 12489 }
12483 12490
12484 12491 /*
12485 12492 * DTrace Consumer State Functions
12486 12493 */
12487 12494 int
12488 12495 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
12489 12496 {
12490 12497 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
12491 12498 void *base;
12492 12499 uintptr_t limit;
12493 12500 dtrace_dynvar_t *dvar, *next, *start;
12494 12501 int i;
12495 12502
12496 12503 ASSERT(MUTEX_HELD(&dtrace_lock));
12497 12504 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
12498 12505
12499 12506 bzero(dstate, sizeof (dtrace_dstate_t));
12500 12507
12501 12508 if ((dstate->dtds_chunksize = chunksize) == 0)
12502 12509 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
12503 12510
12504 12511 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
12505 12512 size = min;
12506 12513
12507 12514 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12508 12515 return (ENOMEM);
12509 12516
12510 12517 dstate->dtds_size = size;
12511 12518 dstate->dtds_base = base;
12512 12519 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
12513 12520 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
12514 12521
12515 12522 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
12516 12523
12517 12524 if (hashsize != 1 && (hashsize & 1))
12518 12525 hashsize--;
12519 12526
12520 12527 dstate->dtds_hashsize = hashsize;
12521 12528 dstate->dtds_hash = dstate->dtds_base;
12522 12529
12523 12530 /*
12524 12531 * Set all of our hash buckets to point to the single sink, and (if
12525 12532 * it hasn't already been set), set the sink's hash value to be the
12526 12533 * sink sentinel value. The sink is needed for dynamic variable
12527 12534 * lookups to know that they have iterated over an entire, valid hash
12528 12535 * chain.
12529 12536 */
12530 12537 for (i = 0; i < hashsize; i++)
12531 12538 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
12532 12539
12533 12540 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
12534 12541 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
12535 12542
12536 12543 /*
12537 12544 * Determine number of active CPUs. Divide free list evenly among
12538 12545 * active CPUs.
12539 12546 */
12540 12547 start = (dtrace_dynvar_t *)
12541 12548 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
12542 12549 limit = (uintptr_t)base + size;
12543 12550
12544 12551 maxper = (limit - (uintptr_t)start) / NCPU;
12545 12552 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
12546 12553
12547 12554 for (i = 0; i < NCPU; i++) {
12548 12555 dstate->dtds_percpu[i].dtdsc_free = dvar = start;
12549 12556
12550 12557 /*
12551 12558 * If we don't even have enough chunks to make it once through
12552 12559 * NCPUs, we're just going to allocate everything to the first
12553 12560 * CPU. And if we're on the last CPU, we're going to allocate
12554 12561 * whatever is left over. In either case, we set the limit to
12555 12562 * be the limit of the dynamic variable space.
12556 12563 */
12557 12564 if (maxper == 0 || i == NCPU - 1) {
12558 12565 limit = (uintptr_t)base + size;
12559 12566 start = NULL;
12560 12567 } else {
12561 12568 limit = (uintptr_t)start + maxper;
12562 12569 start = (dtrace_dynvar_t *)limit;
12563 12570 }
12564 12571
12565 12572 ASSERT(limit <= (uintptr_t)base + size);
12566 12573
12567 12574 for (;;) {
12568 12575 next = (dtrace_dynvar_t *)((uintptr_t)dvar +
12569 12576 dstate->dtds_chunksize);
12570 12577
12571 12578 if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
12572 12579 break;
12573 12580
12574 12581 dvar->dtdv_next = next;
12575 12582 dvar = next;
12576 12583 }
12577 12584
12578 12585 if (maxper == 0)
12579 12586 break;
12580 12587 }
12581 12588
12582 12589 return (0);
12583 12590 }
12584 12591
12585 12592 void
12586 12593 dtrace_dstate_fini(dtrace_dstate_t *dstate)
12587 12594 {
12588 12595 ASSERT(MUTEX_HELD(&cpu_lock));
12589 12596
12590 12597 if (dstate->dtds_base == NULL)
12591 12598 return;
12592 12599
12593 12600 kmem_free(dstate->dtds_base, dstate->dtds_size);
12594 12601 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
12595 12602 }
12596 12603
12597 12604 static void
12598 12605 dtrace_vstate_fini(dtrace_vstate_t *vstate)
12599 12606 {
12600 12607 /*
12601 12608 * Logical XOR, where are you?
12602 12609 */
12603 12610 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
12604 12611
12605 12612 if (vstate->dtvs_nglobals > 0) {
12606 12613 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
12607 12614 sizeof (dtrace_statvar_t *));
12608 12615 }
12609 12616
12610 12617 if (vstate->dtvs_ntlocals > 0) {
12611 12618 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
12612 12619 sizeof (dtrace_difv_t));
12613 12620 }
12614 12621
12615 12622 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
12616 12623
12617 12624 if (vstate->dtvs_nlocals > 0) {
12618 12625 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
12619 12626 sizeof (dtrace_statvar_t *));
12620 12627 }
12621 12628 }
12622 12629
12623 12630 static void
12624 12631 dtrace_state_clean(dtrace_state_t *state)
12625 12632 {
12626 12633 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
12627 12634 return;
12628 12635
12629 12636 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
12630 12637 dtrace_speculation_clean(state);
12631 12638 }
12632 12639
12633 12640 static void
12634 12641 dtrace_state_deadman(dtrace_state_t *state)
12635 12642 {
12636 12643 hrtime_t now;
12637 12644
12638 12645 dtrace_sync();
12639 12646
12640 12647 now = dtrace_gethrtime();
12641 12648
12642 12649 if (state != dtrace_anon.dta_state &&
12643 12650 now - state->dts_laststatus >= dtrace_deadman_user)
12644 12651 return;
12645 12652
12646 12653 /*
12647 12654 * We must be sure that dts_alive never appears to be less than the
12648 12655 * value upon entry to dtrace_state_deadman(), and because we lack a
12649 12656 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12650 12657 * store INT64_MAX to it, followed by a memory barrier, followed by
12651 12658 * the new value. This assures that dts_alive never appears to be
12652 12659 * less than its true value, regardless of the order in which the
12653 12660 * stores to the underlying storage are issued.
12654 12661 */
12655 12662 state->dts_alive = INT64_MAX;
12656 12663 dtrace_membar_producer();
12657 12664 state->dts_alive = now;
12658 12665 }
12659 12666
12660 12667 dtrace_state_t *
12661 12668 dtrace_state_create(dev_t *devp, cred_t *cr)
12662 12669 {
12663 12670 minor_t minor;
12664 12671 major_t major;
12665 12672 char c[30];
12666 12673 dtrace_state_t *state;
12667 12674 dtrace_optval_t *opt;
12668 12675 int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
12669 12676
12670 12677 ASSERT(MUTEX_HELD(&dtrace_lock));
12671 12678 ASSERT(MUTEX_HELD(&cpu_lock));
12672 12679
12673 12680 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
12674 12681 VM_BESTFIT | VM_SLEEP);
12675 12682
12676 12683 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
12677 12684 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
12678 12685 return (NULL);
12679 12686 }
12680 12687
12681 12688 state = ddi_get_soft_state(dtrace_softstate, minor);
12682 12689 state->dts_epid = DTRACE_EPIDNONE + 1;
12683 12690
12684 12691 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
12685 12692 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
12686 12693 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
12687 12694
12688 12695 if (devp != NULL) {
12689 12696 major = getemajor(*devp);
12690 12697 } else {
12691 12698 major = ddi_driver_major(dtrace_devi);
12692 12699 }
12693 12700
12694 12701 state->dts_dev = makedevice(major, minor);
12695 12702
12696 12703 if (devp != NULL)
12697 12704 *devp = state->dts_dev;
12698 12705
12699 12706 /*
12700 12707 * We allocate NCPU buffers. On the one hand, this can be quite
12701 12708 * a bit of memory per instance (nearly 36K on a Starcat). On the
12702 12709 * other hand, it saves an additional memory reference in the probe
12703 12710 * path.
12704 12711 */
12705 12712 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
12706 12713 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
12707 12714 state->dts_cleaner = CYCLIC_NONE;
12708 12715 state->dts_deadman = CYCLIC_NONE;
12709 12716 state->dts_vstate.dtvs_state = state;
12710 12717
12711 12718 for (i = 0; i < DTRACEOPT_MAX; i++)
12712 12719 state->dts_options[i] = DTRACEOPT_UNSET;
12713 12720
12714 12721 /*
12715 12722 * Set the default options.
12716 12723 */
12717 12724 opt = state->dts_options;
12718 12725 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
12719 12726 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
12720 12727 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
12721 12728 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
12722 12729 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
12723 12730 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
12724 12731 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
12725 12732 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
12726 12733 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
12727 12734 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
12728 12735 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
12729 12736 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
12730 12737 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
12731 12738 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
12732 12739
12733 12740 state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
12734 12741
12735 12742 /*
12736 12743 * Depending on the user credentials, we set flag bits which alter probe
12737 12744 * visibility or the amount of destructiveness allowed. In the case of
12738 12745 * actual anonymous tracing, or the possession of all privileges, all of
12739 12746 * the normal checks are bypassed.
12740 12747 */
12741 12748 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
12742 12749 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
12743 12750 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
12744 12751 } else {
12745 12752 /*
12746 12753 * Set up the credentials for this instantiation. We take a
12747 12754 * hold on the credential to prevent it from disappearing on
12748 12755 * us; this in turn prevents the zone_t referenced by this
12749 12756 * credential from disappearing. This means that we can
12750 12757 * examine the credential and the zone from probe context.
12751 12758 */
12752 12759 crhold(cr);
12753 12760 state->dts_cred.dcr_cred = cr;
12754 12761
12755 12762 /*
12756 12763 * CRA_PROC means "we have *some* privilege for dtrace" and
12757 12764 * unlocks the use of variables like pid, zonename, etc.
12758 12765 */
12759 12766 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
12760 12767 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
12761 12768 state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
12762 12769 }
12763 12770
12764 12771 /*
12765 12772 * dtrace_user allows use of syscall and profile providers.
12766 12773 * If the user also has proc_owner and/or proc_zone, we
12767 12774 * extend the scope to include additional visibility and
12768 12775 * destructive power.
12769 12776 */
12770 12777 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
12771 12778 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
12772 12779 state->dts_cred.dcr_visible |=
12773 12780 DTRACE_CRV_ALLPROC;
12774 12781
12775 12782 state->dts_cred.dcr_action |=
12776 12783 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12777 12784 }
12778 12785
12779 12786 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
12780 12787 state->dts_cred.dcr_visible |=
12781 12788 DTRACE_CRV_ALLZONE;
12782 12789
12783 12790 state->dts_cred.dcr_action |=
12784 12791 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12785 12792 }
12786 12793
12787 12794 /*
12788 12795 * If we have all privs in whatever zone this is,
12789 12796 * we can do destructive things to processes which
12790 12797 * have altered credentials.
12791 12798 */
12792 12799 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
12793 12800 cr->cr_zone->zone_privset)) {
12794 12801 state->dts_cred.dcr_action |=
12795 12802 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
12796 12803 }
12797 12804 }
12798 12805
12799 12806 /*
12800 12807 * Holding the dtrace_kernel privilege also implies that
12801 12808 * the user has the dtrace_user privilege from a visibility
12802 12809 * perspective. But without further privileges, some
12803 12810 * destructive actions are not available.
12804 12811 */
12805 12812 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
12806 12813 /*
12807 12814 * Make all probes in all zones visible. However,
12808 12815 * this doesn't mean that all actions become available
12809 12816 * to all zones.
12810 12817 */
12811 12818 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
12812 12819 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
12813 12820
12814 12821 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
12815 12822 DTRACE_CRA_PROC;
12816 12823 /*
12817 12824 * Holding proc_owner means that destructive actions
12818 12825 * for *this* zone are allowed.
12819 12826 */
12820 12827 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
12821 12828 state->dts_cred.dcr_action |=
12822 12829 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12823 12830
12824 12831 /*
12825 12832 * Holding proc_zone means that destructive actions
12826 12833 * for this user/group ID in all zones is allowed.
12827 12834 */
12828 12835 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
12829 12836 state->dts_cred.dcr_action |=
12830 12837 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12831 12838
12832 12839 /*
12833 12840 * If we have all privs in whatever zone this is,
12834 12841 * we can do destructive things to processes which
12835 12842 * have altered credentials.
12836 12843 */
12837 12844 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
12838 12845 cr->cr_zone->zone_privset)) {
12839 12846 state->dts_cred.dcr_action |=
12840 12847 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
12841 12848 }
12842 12849 }
12843 12850
12844 12851 /*
12845 12852 * Holding the dtrace_proc privilege gives control over fasttrap
12846 12853 * and pid providers. We need to grant wider destructive
12847 12854 * privileges in the event that the user has proc_owner and/or
12848 12855 * proc_zone.
12849 12856 */
12850 12857 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
12851 12858 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
12852 12859 state->dts_cred.dcr_action |=
12853 12860 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12854 12861
12855 12862 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
12856 12863 state->dts_cred.dcr_action |=
12857 12864 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12858 12865 }
12859 12866 }
12860 12867
12861 12868 return (state);
12862 12869 }
12863 12870
12864 12871 static int
12865 12872 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
12866 12873 {
12867 12874 dtrace_optval_t *opt = state->dts_options, size;
12868 12875 processorid_t cpu;
12869 12876 int flags = 0, rval, factor, divisor = 1;
12870 12877
12871 12878 ASSERT(MUTEX_HELD(&dtrace_lock));
12872 12879 ASSERT(MUTEX_HELD(&cpu_lock));
12873 12880 ASSERT(which < DTRACEOPT_MAX);
12874 12881 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
12875 12882 (state == dtrace_anon.dta_state &&
12876 12883 state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
12877 12884
12878 12885 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
12879 12886 return (0);
12880 12887
12881 12888 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
12882 12889 cpu = opt[DTRACEOPT_CPU];
12883 12890
12884 12891 if (which == DTRACEOPT_SPECSIZE)
12885 12892 flags |= DTRACEBUF_NOSWITCH;
12886 12893
12887 12894 if (which == DTRACEOPT_BUFSIZE) {
12888 12895 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
12889 12896 flags |= DTRACEBUF_RING;
12890 12897
12891 12898 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
12892 12899 flags |= DTRACEBUF_FILL;
12893 12900
12894 12901 if (state != dtrace_anon.dta_state ||
12895 12902 state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
12896 12903 flags |= DTRACEBUF_INACTIVE;
12897 12904 }
12898 12905
12899 12906 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
12900 12907 /*
12901 12908 * The size must be 8-byte aligned. If the size is not 8-byte
12902 12909 * aligned, drop it down by the difference.
12903 12910 */
12904 12911 if (size & (sizeof (uint64_t) - 1))
12905 12912 size -= size & (sizeof (uint64_t) - 1);
12906 12913
12907 12914 if (size < state->dts_reserve) {
12908 12915 /*
12909 12916 * Buffers always must be large enough to accommodate
12910 12917 * their prereserved space. We return E2BIG instead
12911 12918 * of ENOMEM in this case to allow for user-level
12912 12919 * software to differentiate the cases.
12913 12920 */
12914 12921 return (E2BIG);
12915 12922 }
12916 12923
12917 12924 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
12918 12925
12919 12926 if (rval != ENOMEM) {
12920 12927 opt[which] = size;
12921 12928 return (rval);
12922 12929 }
12923 12930
12924 12931 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
12925 12932 return (rval);
12926 12933
12927 12934 for (divisor = 2; divisor < factor; divisor <<= 1)
12928 12935 continue;
12929 12936 }
12930 12937
12931 12938 return (ENOMEM);
12932 12939 }
12933 12940
12934 12941 static int
12935 12942 dtrace_state_buffers(dtrace_state_t *state)
12936 12943 {
12937 12944 dtrace_speculation_t *spec = state->dts_speculations;
12938 12945 int rval, i;
12939 12946
12940 12947 if ((rval = dtrace_state_buffer(state, state->dts_buffer,
12941 12948 DTRACEOPT_BUFSIZE)) != 0)
12942 12949 return (rval);
12943 12950
12944 12951 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
12945 12952 DTRACEOPT_AGGSIZE)) != 0)
12946 12953 return (rval);
12947 12954
12948 12955 for (i = 0; i < state->dts_nspeculations; i++) {
12949 12956 if ((rval = dtrace_state_buffer(state,
12950 12957 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
12951 12958 return (rval);
12952 12959 }
12953 12960
12954 12961 return (0);
12955 12962 }
12956 12963
12957 12964 static void
12958 12965 dtrace_state_prereserve(dtrace_state_t *state)
12959 12966 {
12960 12967 dtrace_ecb_t *ecb;
12961 12968 dtrace_probe_t *probe;
12962 12969
12963 12970 state->dts_reserve = 0;
12964 12971
12965 12972 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
12966 12973 return;
12967 12974
12968 12975 /*
12969 12976 * If our buffer policy is a "fill" buffer policy, we need to set the
12970 12977 * prereserved space to be the space required by the END probes.
12971 12978 */
12972 12979 probe = dtrace_probes[dtrace_probeid_end - 1];
12973 12980 ASSERT(probe != NULL);
12974 12981
12975 12982 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
12976 12983 if (ecb->dte_state != state)
12977 12984 continue;
12978 12985
12979 12986 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
12980 12987 }
12981 12988 }
12982 12989
12983 12990 static int
12984 12991 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
12985 12992 {
12986 12993 dtrace_optval_t *opt = state->dts_options, sz, nspec;
12987 12994 dtrace_speculation_t *spec;
12988 12995 dtrace_buffer_t *buf;
12989 12996 cyc_handler_t hdlr;
12990 12997 cyc_time_t when;
12991 12998 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
12992 12999 dtrace_icookie_t cookie;
12993 13000
12994 13001 mutex_enter(&cpu_lock);
12995 13002 mutex_enter(&dtrace_lock);
12996 13003
12997 13004 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
12998 13005 rval = EBUSY;
12999 13006 goto out;
13000 13007 }
13001 13008
13002 13009 /*
13003 13010 * Before we can perform any checks, we must prime all of the
13004 13011 * retained enablings that correspond to this state.
13005 13012 */
13006 13013 dtrace_enabling_prime(state);
13007 13014
13008 13015 if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
13009 13016 rval = EACCES;
13010 13017 goto out;
13011 13018 }
13012 13019
13013 13020 dtrace_state_prereserve(state);
13014 13021
13015 13022 /*
13016 13023 * Now we want to do is try to allocate our speculations.
13017 13024 * We do not automatically resize the number of speculations; if
13018 13025 * this fails, we will fail the operation.
13019 13026 */
13020 13027 nspec = opt[DTRACEOPT_NSPEC];
13021 13028 ASSERT(nspec != DTRACEOPT_UNSET);
13022 13029
13023 13030 if (nspec > INT_MAX) {
13024 13031 rval = ENOMEM;
13025 13032 goto out;
13026 13033 }
13027 13034
13028 13035 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
13029 13036 KM_NOSLEEP | KM_NORMALPRI);
13030 13037
13031 13038 if (spec == NULL) {
13032 13039 rval = ENOMEM;
13033 13040 goto out;
13034 13041 }
13035 13042
13036 13043 state->dts_speculations = spec;
13037 13044 state->dts_nspeculations = (int)nspec;
13038 13045
13039 13046 for (i = 0; i < nspec; i++) {
13040 13047 if ((buf = kmem_zalloc(bufsize,
13041 13048 KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
13042 13049 rval = ENOMEM;
13043 13050 goto err;
13044 13051 }
13045 13052
13046 13053 spec[i].dtsp_buffer = buf;
13047 13054 }
13048 13055
13049 13056 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
13050 13057 if (dtrace_anon.dta_state == NULL) {
13051 13058 rval = ENOENT;
13052 13059 goto out;
13053 13060 }
13054 13061
13055 13062 if (state->dts_necbs != 0) {
13056 13063 rval = EALREADY;
13057 13064 goto out;
13058 13065 }
13059 13066
13060 13067 state->dts_anon = dtrace_anon_grab();
13061 13068 ASSERT(state->dts_anon != NULL);
13062 13069 state = state->dts_anon;
13063 13070
13064 13071 /*
13065 13072 * We want "grabanon" to be set in the grabbed state, so we'll
13066 13073 * copy that option value from the grabbing state into the
13067 13074 * grabbed state.
13068 13075 */
13069 13076 state->dts_options[DTRACEOPT_GRABANON] =
13070 13077 opt[DTRACEOPT_GRABANON];
13071 13078
13072 13079 *cpu = dtrace_anon.dta_beganon;
13073 13080
13074 13081 /*
13075 13082 * If the anonymous state is active (as it almost certainly
13076 13083 * is if the anonymous enabling ultimately matched anything),
13077 13084 * we don't allow any further option processing -- but we
13078 13085 * don't return failure.
13079 13086 */
13080 13087 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13081 13088 goto out;
13082 13089 }
13083 13090
13084 13091 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
13085 13092 opt[DTRACEOPT_AGGSIZE] != 0) {
13086 13093 if (state->dts_aggregations == NULL) {
13087 13094 /*
13088 13095 * We're not going to create an aggregation buffer
13089 13096 * because we don't have any ECBs that contain
13090 13097 * aggregations -- set this option to 0.
13091 13098 */
13092 13099 opt[DTRACEOPT_AGGSIZE] = 0;
13093 13100 } else {
13094 13101 /*
13095 13102 * If we have an aggregation buffer, we must also have
13096 13103 * a buffer to use as scratch.
13097 13104 */
13098 13105 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
13099 13106 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
13100 13107 opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
13101 13108 }
13102 13109 }
13103 13110 }
13104 13111
13105 13112 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
13106 13113 opt[DTRACEOPT_SPECSIZE] != 0) {
13107 13114 if (!state->dts_speculates) {
13108 13115 /*
13109 13116 * We're not going to create speculation buffers
13110 13117 * because we don't have any ECBs that actually
13111 13118 * speculate -- set the speculation size to 0.
13112 13119 */
13113 13120 opt[DTRACEOPT_SPECSIZE] = 0;
13114 13121 }
13115 13122 }
13116 13123
13117 13124 /*
13118 13125 * The bare minimum size for any buffer that we're actually going to
13119 13126 * do anything to is sizeof (uint64_t).
13120 13127 */
13121 13128 sz = sizeof (uint64_t);
13122 13129
13123 13130 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
13124 13131 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
13125 13132 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
13126 13133 /*
13127 13134 * A buffer size has been explicitly set to 0 (or to a size
13128 13135 * that will be adjusted to 0) and we need the space -- we
13129 13136 * need to return failure. We return ENOSPC to differentiate
13130 13137 * it from failing to allocate a buffer due to failure to meet
13131 13138 * the reserve (for which we return E2BIG).
13132 13139 */
13133 13140 rval = ENOSPC;
13134 13141 goto out;
13135 13142 }
13136 13143
13137 13144 if ((rval = dtrace_state_buffers(state)) != 0)
13138 13145 goto err;
13139 13146
13140 13147 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
13141 13148 sz = dtrace_dstate_defsize;
13142 13149
13143 13150 do {
13144 13151 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
13145 13152
13146 13153 if (rval == 0)
13147 13154 break;
13148 13155
13149 13156 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13150 13157 goto err;
13151 13158 } while (sz >>= 1);
13152 13159
13153 13160 opt[DTRACEOPT_DYNVARSIZE] = sz;
13154 13161
13155 13162 if (rval != 0)
13156 13163 goto err;
13157 13164
13158 13165 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
13159 13166 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
13160 13167
13161 13168 if (opt[DTRACEOPT_CLEANRATE] == 0)
13162 13169 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13163 13170
13164 13171 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
13165 13172 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
13166 13173
13167 13174 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
13168 13175 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13169 13176
13170 13177 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
13171 13178 hdlr.cyh_arg = state;
13172 13179 hdlr.cyh_level = CY_LOW_LEVEL;
13173 13180
13174 13181 when.cyt_when = 0;
13175 13182 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13176 13183
13177 13184 state->dts_cleaner = cyclic_add(&hdlr, &when);
13178 13185
13179 13186 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13180 13187 hdlr.cyh_arg = state;
13181 13188 hdlr.cyh_level = CY_LOW_LEVEL;
13182 13189
13183 13190 when.cyt_when = 0;
13184 13191 when.cyt_interval = dtrace_deadman_interval;
13185 13192
13186 13193 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13187 13194 state->dts_deadman = cyclic_add(&hdlr, &when);
13188 13195
13189 13196 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13190 13197
13191 13198 if (state->dts_getf != 0 &&
13192 13199 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13193 13200 /*
13194 13201 * We don't have kernel privs but we have at least one call
13195 13202 * to getf(); we need to bump our zone's count, and (if
13196 13203 * this is the first enabling to have an unprivileged call
13197 13204 * to getf()) we need to hook into closef().
13198 13205 */
13199 13206 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
13200 13207
13201 13208 if (dtrace_getf++ == 0) {
13202 13209 ASSERT(dtrace_closef == NULL);
13203 13210 dtrace_closef = dtrace_getf_barrier;
13204 13211 }
13205 13212 }
13206 13213
13207 13214 /*
13208 13215 * Now it's time to actually fire the BEGIN probe. We need to disable
13209 13216 * interrupts here both to record the CPU on which we fired the BEGIN
13210 13217 * probe (the data from this CPU will be processed first at user
13211 13218 * level) and to manually activate the buffer for this CPU.
13212 13219 */
13213 13220 cookie = dtrace_interrupt_disable();
13214 13221 *cpu = CPU->cpu_id;
13215 13222 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13216 13223 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13217 13224
13218 13225 dtrace_probe(dtrace_probeid_begin,
13219 13226 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13220 13227 dtrace_interrupt_enable(cookie);
13221 13228 /*
13222 13229 * We may have had an exit action from a BEGIN probe; only change our
13223 13230 * state to ACTIVE if we're still in WARMUP.
13224 13231 */
13225 13232 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13226 13233 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13227 13234
13228 13235 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
13229 13236 state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
13230 13237
13231 13238 /*
13232 13239 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
13233 13240 * want each CPU to transition its principal buffer out of the
13234 13241 * INACTIVE state. Doing this assures that no CPU will suddenly begin
13235 13242 * processing an ECB halfway down a probe's ECB chain; all CPUs will
13236 13243 * atomically transition from processing none of a state's ECBs to
13237 13244 * processing all of them.
13238 13245 */
13239 13246 dtrace_xcall(DTRACE_CPUALL,
13240 13247 (dtrace_xcall_t)dtrace_buffer_activate, state);
13241 13248 goto out;
13242 13249
13243 13250 err:
13244 13251 dtrace_buffer_free(state->dts_buffer);
13245 13252 dtrace_buffer_free(state->dts_aggbuffer);
13246 13253
13247 13254 if ((nspec = state->dts_nspeculations) == 0) {
13248 13255 ASSERT(state->dts_speculations == NULL);
13249 13256 goto out;
13250 13257 }
13251 13258
13252 13259 spec = state->dts_speculations;
13253 13260 ASSERT(spec != NULL);
13254 13261
13255 13262 for (i = 0; i < state->dts_nspeculations; i++) {
13256 13263 if ((buf = spec[i].dtsp_buffer) == NULL)
13257 13264 break;
13258 13265
13259 13266 dtrace_buffer_free(buf);
13260 13267 kmem_free(buf, bufsize);
13261 13268 }
13262 13269
13263 13270 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13264 13271 state->dts_nspeculations = 0;
13265 13272 state->dts_speculations = NULL;
13266 13273
13267 13274 out:
13268 13275 mutex_exit(&dtrace_lock);
13269 13276 mutex_exit(&cpu_lock);
13270 13277
13271 13278 return (rval);
13272 13279 }
13273 13280
13274 13281 static int
13275 13282 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
13276 13283 {
13277 13284 dtrace_icookie_t cookie;
13278 13285
13279 13286 ASSERT(MUTEX_HELD(&dtrace_lock));
13280 13287
13281 13288 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
13282 13289 state->dts_activity != DTRACE_ACTIVITY_DRAINING)
13283 13290 return (EINVAL);
13284 13291
13285 13292 /*
13286 13293 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
13287 13294 * to be sure that every CPU has seen it. See below for the details
13288 13295 * on why this is done.
13289 13296 */
13290 13297 state->dts_activity = DTRACE_ACTIVITY_DRAINING;
13291 13298 dtrace_sync();
13292 13299
13293 13300 /*
13294 13301 * By this point, it is impossible for any CPU to be still processing
13295 13302 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
13296 13303 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
13297 13304 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
13298 13305 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
13299 13306 * iff we're in the END probe.
13300 13307 */
13301 13308 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
13302 13309 dtrace_sync();
13303 13310 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13304 13311
13305 13312 /*
13306 13313 * Finally, we can release the reserve and call the END probe. We
13307 13314 * disable interrupts across calling the END probe to allow us to
13308 13315 * return the CPU on which we actually called the END probe. This
13309 13316 * allows user-land to be sure that this CPU's principal buffer is
13310 13317 * processed last.
13311 13318 */
13312 13319 state->dts_reserve = 0;
13313 13320
13314 13321 cookie = dtrace_interrupt_disable();
13315 13322 *cpu = CPU->cpu_id;
13316 13323 dtrace_probe(dtrace_probeid_end,
13317 13324 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13318 13325 dtrace_interrupt_enable(cookie);
13319 13326
13320 13327 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13321 13328 dtrace_sync();
13322 13329
13323 13330 if (state->dts_getf != 0 &&
13324 13331 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13325 13332 /*
13326 13333 * We don't have kernel privs but we have at least one call
13327 13334 * to getf(); we need to lower our zone's count, and (if
13328 13335 * this is the last enabling to have an unprivileged call
13329 13336 * to getf()) we need to clear the closef() hook.
13330 13337 */
13331 13338 ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
13332 13339 ASSERT(dtrace_closef == dtrace_getf_barrier);
13333 13340 ASSERT(dtrace_getf > 0);
13334 13341
13335 13342 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
13336 13343
13337 13344 if (--dtrace_getf == 0)
13338 13345 dtrace_closef = NULL;
13339 13346 }
13340 13347
13341 13348 return (0);
13342 13349 }
13343 13350
13344 13351 static int
13345 13352 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13346 13353 dtrace_optval_t val)
13347 13354 {
13348 13355 ASSERT(MUTEX_HELD(&dtrace_lock));
13349 13356
13350 13357 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13351 13358 return (EBUSY);
13352 13359
13353 13360 if (option >= DTRACEOPT_MAX)
13354 13361 return (EINVAL);
13355 13362
13356 13363 if (option != DTRACEOPT_CPU && val < 0)
13357 13364 return (EINVAL);
13358 13365
13359 13366 switch (option) {
13360 13367 case DTRACEOPT_DESTRUCTIVE:
13361 13368 if (dtrace_destructive_disallow)
13362 13369 return (EACCES);
13363 13370
13364 13371 state->dts_cred.dcr_destructive = 1;
13365 13372 break;
13366 13373
13367 13374 case DTRACEOPT_BUFSIZE:
13368 13375 case DTRACEOPT_DYNVARSIZE:
13369 13376 case DTRACEOPT_AGGSIZE:
13370 13377 case DTRACEOPT_SPECSIZE:
13371 13378 case DTRACEOPT_STRSIZE:
13372 13379 if (val < 0)
13373 13380 return (EINVAL);
13374 13381
13375 13382 if (val >= LONG_MAX) {
13376 13383 /*
13377 13384 * If this is an otherwise negative value, set it to
13378 13385 * the highest multiple of 128m less than LONG_MAX.
13379 13386 * Technically, we're adjusting the size without
13380 13387 * regard to the buffer resizing policy, but in fact,
13381 13388 * this has no effect -- if we set the buffer size to
13382 13389 * ~LONG_MAX and the buffer policy is ultimately set to
13383 13390 * be "manual", the buffer allocation is guaranteed to
13384 13391 * fail, if only because the allocation requires two
13385 13392 * buffers. (We set the the size to the highest
13386 13393 * multiple of 128m because it ensures that the size
13387 13394 * will remain a multiple of a megabyte when
13388 13395 * repeatedly halved -- all the way down to 15m.)
13389 13396 */
13390 13397 val = LONG_MAX - (1 << 27) + 1;
13391 13398 }
13392 13399 }
13393 13400
13394 13401 state->dts_options[option] = val;
13395 13402
13396 13403 return (0);
13397 13404 }
13398 13405
13399 13406 static void
13400 13407 dtrace_state_destroy(dtrace_state_t *state)
13401 13408 {
13402 13409 dtrace_ecb_t *ecb;
13403 13410 dtrace_vstate_t *vstate = &state->dts_vstate;
13404 13411 minor_t minor = getminor(state->dts_dev);
13405 13412 int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
13406 13413 dtrace_speculation_t *spec = state->dts_speculations;
13407 13414 int nspec = state->dts_nspeculations;
13408 13415 uint32_t match;
13409 13416
13410 13417 ASSERT(MUTEX_HELD(&dtrace_lock));
13411 13418 ASSERT(MUTEX_HELD(&cpu_lock));
13412 13419
13413 13420 /*
13414 13421 * First, retract any retained enablings for this state.
13415 13422 */
13416 13423 dtrace_enabling_retract(state);
13417 13424 ASSERT(state->dts_nretained == 0);
13418 13425
13419 13426 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
13420 13427 state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
13421 13428 /*
13422 13429 * We have managed to come into dtrace_state_destroy() on a
13423 13430 * hot enabling -- almost certainly because of a disorderly
13424 13431 * shutdown of a consumer. (That is, a consumer that is
13425 13432 * exiting without having called dtrace_stop().) In this case,
13426 13433 * we're going to set our activity to be KILLED, and then
13427 13434 * issue a sync to be sure that everyone is out of probe
13428 13435 * context before we start blowing away ECBs.
13429 13436 */
13430 13437 state->dts_activity = DTRACE_ACTIVITY_KILLED;
13431 13438 dtrace_sync();
13432 13439 }
13433 13440
13434 13441 /*
13435 13442 * Release the credential hold we took in dtrace_state_create().
13436 13443 */
13437 13444 if (state->dts_cred.dcr_cred != NULL)
13438 13445 crfree(state->dts_cred.dcr_cred);
13439 13446
13440 13447 /*
13441 13448 * Now we can safely disable and destroy any enabled probes. Because
13442 13449 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
13443 13450 * (especially if they're all enabled), we take two passes through the
13444 13451 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
13445 13452 * in the second we disable whatever is left over.
13446 13453 */
13447 13454 for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
13448 13455 for (i = 0; i < state->dts_necbs; i++) {
13449 13456 if ((ecb = state->dts_ecbs[i]) == NULL)
13450 13457 continue;
13451 13458
13452 13459 if (match && ecb->dte_probe != NULL) {
13453 13460 dtrace_probe_t *probe = ecb->dte_probe;
13454 13461 dtrace_provider_t *prov = probe->dtpr_provider;
13455 13462
13456 13463 if (!(prov->dtpv_priv.dtpp_flags & match))
13457 13464 continue;
13458 13465 }
13459 13466
13460 13467 dtrace_ecb_disable(ecb);
13461 13468 dtrace_ecb_destroy(ecb);
13462 13469 }
13463 13470
13464 13471 if (!match)
13465 13472 break;
13466 13473 }
13467 13474
13468 13475 /*
13469 13476 * Before we free the buffers, perform one more sync to assure that
13470 13477 * every CPU is out of probe context.
13471 13478 */
13472 13479 dtrace_sync();
13473 13480
13474 13481 dtrace_buffer_free(state->dts_buffer);
13475 13482 dtrace_buffer_free(state->dts_aggbuffer);
13476 13483
13477 13484 for (i = 0; i < nspec; i++)
13478 13485 dtrace_buffer_free(spec[i].dtsp_buffer);
13479 13486
13480 13487 if (state->dts_cleaner != CYCLIC_NONE)
13481 13488 cyclic_remove(state->dts_cleaner);
13482 13489
13483 13490 if (state->dts_deadman != CYCLIC_NONE)
13484 13491 cyclic_remove(state->dts_deadman);
13485 13492
13486 13493 dtrace_dstate_fini(&vstate->dtvs_dynvars);
13487 13494 dtrace_vstate_fini(vstate);
13488 13495 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
13489 13496
13490 13497 if (state->dts_aggregations != NULL) {
13491 13498 #ifdef DEBUG
13492 13499 for (i = 0; i < state->dts_naggregations; i++)
13493 13500 ASSERT(state->dts_aggregations[i] == NULL);
13494 13501 #endif
13495 13502 ASSERT(state->dts_naggregations > 0);
13496 13503 kmem_free(state->dts_aggregations,
13497 13504 state->dts_naggregations * sizeof (dtrace_aggregation_t *));
13498 13505 }
13499 13506
13500 13507 kmem_free(state->dts_buffer, bufsize);
13501 13508 kmem_free(state->dts_aggbuffer, bufsize);
13502 13509
13503 13510 for (i = 0; i < nspec; i++)
13504 13511 kmem_free(spec[i].dtsp_buffer, bufsize);
13505 13512
13506 13513 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13507 13514
13508 13515 dtrace_format_destroy(state);
13509 13516
13510 13517 vmem_destroy(state->dts_aggid_arena);
13511 13518 ddi_soft_state_free(dtrace_softstate, minor);
13512 13519 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
13513 13520 }
13514 13521
13515 13522 /*
13516 13523 * DTrace Anonymous Enabling Functions
13517 13524 */
13518 13525 static dtrace_state_t *
13519 13526 dtrace_anon_grab(void)
13520 13527 {
13521 13528 dtrace_state_t *state;
13522 13529
13523 13530 ASSERT(MUTEX_HELD(&dtrace_lock));
13524 13531
13525 13532 if ((state = dtrace_anon.dta_state) == NULL) {
13526 13533 ASSERT(dtrace_anon.dta_enabling == NULL);
13527 13534 return (NULL);
13528 13535 }
13529 13536
13530 13537 ASSERT(dtrace_anon.dta_enabling != NULL);
13531 13538 ASSERT(dtrace_retained != NULL);
13532 13539
13533 13540 dtrace_enabling_destroy(dtrace_anon.dta_enabling);
13534 13541 dtrace_anon.dta_enabling = NULL;
13535 13542 dtrace_anon.dta_state = NULL;
13536 13543
13537 13544 return (state);
13538 13545 }
13539 13546
13540 13547 static void
13541 13548 dtrace_anon_property(void)
13542 13549 {
13543 13550 int i, rv;
13544 13551 dtrace_state_t *state;
13545 13552 dof_hdr_t *dof;
13546 13553 char c[32]; /* enough for "dof-data-" + digits */
13547 13554
13548 13555 ASSERT(MUTEX_HELD(&dtrace_lock));
13549 13556 ASSERT(MUTEX_HELD(&cpu_lock));
13550 13557
13551 13558 for (i = 0; ; i++) {
13552 13559 (void) snprintf(c, sizeof (c), "dof-data-%d", i);
13553 13560
13554 13561 dtrace_err_verbose = 1;
13555 13562
13556 13563 if ((dof = dtrace_dof_property(c)) == NULL) {
13557 13564 dtrace_err_verbose = 0;
13558 13565 break;
13559 13566 }
13560 13567
13561 13568 /*
13562 13569 * We want to create anonymous state, so we need to transition
13563 13570 * the kernel debugger to indicate that DTrace is active. If
13564 13571 * this fails (e.g. because the debugger has modified text in
13565 13572 * some way), we won't continue with the processing.
13566 13573 */
13567 13574 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
13568 13575 cmn_err(CE_NOTE, "kernel debugger active; anonymous "
13569 13576 "enabling ignored.");
13570 13577 dtrace_dof_destroy(dof);
13571 13578 break;
13572 13579 }
13573 13580
13574 13581 /*
13575 13582 * If we haven't allocated an anonymous state, we'll do so now.
13576 13583 */
13577 13584 if ((state = dtrace_anon.dta_state) == NULL) {
13578 13585 state = dtrace_state_create(NULL, NULL);
13579 13586 dtrace_anon.dta_state = state;
13580 13587
13581 13588 if (state == NULL) {
13582 13589 /*
13583 13590 * This basically shouldn't happen: the only
13584 13591 * failure mode from dtrace_state_create() is a
13585 13592 * failure of ddi_soft_state_zalloc() that
13586 13593 * itself should never happen. Still, the
13587 13594 * interface allows for a failure mode, and
13588 13595 * we want to fail as gracefully as possible:
13589 13596 * we'll emit an error message and cease
13590 13597 * processing anonymous state in this case.
13591 13598 */
13592 13599 cmn_err(CE_WARN, "failed to create "
13593 13600 "anonymous state");
13594 13601 dtrace_dof_destroy(dof);
13595 13602 break;
13596 13603 }
13597 13604 }
13598 13605
13599 13606 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
13600 13607 &dtrace_anon.dta_enabling, 0, B_TRUE);
13601 13608
13602 13609 if (rv == 0)
13603 13610 rv = dtrace_dof_options(dof, state);
13604 13611
13605 13612 dtrace_err_verbose = 0;
13606 13613 dtrace_dof_destroy(dof);
13607 13614
13608 13615 if (rv != 0) {
13609 13616 /*
13610 13617 * This is malformed DOF; chuck any anonymous state
13611 13618 * that we created.
13612 13619 */
13613 13620 ASSERT(dtrace_anon.dta_enabling == NULL);
13614 13621 dtrace_state_destroy(state);
13615 13622 dtrace_anon.dta_state = NULL;
13616 13623 break;
13617 13624 }
13618 13625
13619 13626 ASSERT(dtrace_anon.dta_enabling != NULL);
13620 13627 }
13621 13628
13622 13629 if (dtrace_anon.dta_enabling != NULL) {
13623 13630 int rval;
13624 13631
13625 13632 /*
13626 13633 * dtrace_enabling_retain() can only fail because we are
13627 13634 * trying to retain more enablings than are allowed -- but
13628 13635 * we only have one anonymous enabling, and we are guaranteed
13629 13636 * to be allowed at least one retained enabling; we assert
13630 13637 * that dtrace_enabling_retain() returns success.
13631 13638 */
13632 13639 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
13633 13640 ASSERT(rval == 0);
13634 13641
13635 13642 dtrace_enabling_dump(dtrace_anon.dta_enabling);
13636 13643 }
13637 13644 }
13638 13645
13639 13646 /*
13640 13647 * DTrace Helper Functions
13641 13648 */
13642 13649 static void
13643 13650 dtrace_helper_trace(dtrace_helper_action_t *helper,
13644 13651 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
13645 13652 {
13646 13653 uint32_t size, next, nnext, i;
13647 13654 dtrace_helptrace_t *ent;
13648 13655 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
13649 13656
13650 13657 if (!dtrace_helptrace_enabled)
13651 13658 return;
13652 13659
13653 13660 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
13654 13661
13655 13662 /*
13656 13663 * What would a tracing framework be without its own tracing
13657 13664 * framework? (Well, a hell of a lot simpler, for starters...)
13658 13665 */
13659 13666 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
13660 13667 sizeof (uint64_t) - sizeof (uint64_t);
13661 13668
13662 13669 /*
13663 13670 * Iterate until we can allocate a slot in the trace buffer.
13664 13671 */
13665 13672 do {
13666 13673 next = dtrace_helptrace_next;
13667 13674
13668 13675 if (next + size < dtrace_helptrace_bufsize) {
13669 13676 nnext = next + size;
13670 13677 } else {
13671 13678 nnext = size;
13672 13679 }
13673 13680 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
13674 13681
13675 13682 /*
13676 13683 * We have our slot; fill it in.
13677 13684 */
13678 13685 if (nnext == size)
13679 13686 next = 0;
13680 13687
13681 13688 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
13682 13689 ent->dtht_helper = helper;
13683 13690 ent->dtht_where = where;
13684 13691 ent->dtht_nlocals = vstate->dtvs_nlocals;
13685 13692
13686 13693 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
13687 13694 mstate->dtms_fltoffs : -1;
13688 13695 ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
13689 13696 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
13690 13697
13691 13698 for (i = 0; i < vstate->dtvs_nlocals; i++) {
13692 13699 dtrace_statvar_t *svar;
13693 13700
13694 13701 if ((svar = vstate->dtvs_locals[i]) == NULL)
13695 13702 continue;
13696 13703
13697 13704 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
13698 13705 ent->dtht_locals[i] =
13699 13706 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
13700 13707 }
13701 13708 }
13702 13709
13703 13710 static uint64_t
13704 13711 dtrace_helper(int which, dtrace_mstate_t *mstate,
13705 13712 dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
13706 13713 {
13707 13714 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
13708 13715 uint64_t sarg0 = mstate->dtms_arg[0];
13709 13716 uint64_t sarg1 = mstate->dtms_arg[1];
13710 13717 uint64_t rval;
13711 13718 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
13712 13719 dtrace_helper_action_t *helper;
13713 13720 dtrace_vstate_t *vstate;
13714 13721 dtrace_difo_t *pred;
13715 13722 int i, trace = dtrace_helptrace_enabled;
13716 13723
13717 13724 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
13718 13725
13719 13726 if (helpers == NULL)
13720 13727 return (0);
13721 13728
13722 13729 if ((helper = helpers->dthps_actions[which]) == NULL)
13723 13730 return (0);
13724 13731
13725 13732 vstate = &helpers->dthps_vstate;
13726 13733 mstate->dtms_arg[0] = arg0;
13727 13734 mstate->dtms_arg[1] = arg1;
13728 13735
13729 13736 /*
13730 13737 * Now iterate over each helper. If its predicate evaluates to 'true',
13731 13738 * we'll call the corresponding actions. Note that the below calls
13732 13739 * to dtrace_dif_emulate() may set faults in machine state. This is
13733 13740 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13734 13741 * the stored DIF offset with its own (which is the desired behavior).
13735 13742 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13736 13743 * from machine state; this is okay, too.
13737 13744 */
13738 13745 for (; helper != NULL; helper = helper->dtha_next) {
13739 13746 if ((pred = helper->dtha_predicate) != NULL) {
13740 13747 if (trace)
13741 13748 dtrace_helper_trace(helper, mstate, vstate, 0);
13742 13749
13743 13750 if (!dtrace_dif_emulate(pred, mstate, vstate, state))
13744 13751 goto next;
13745 13752
13746 13753 if (*flags & CPU_DTRACE_FAULT)
13747 13754 goto err;
13748 13755 }
13749 13756
13750 13757 for (i = 0; i < helper->dtha_nactions; i++) {
13751 13758 if (trace)
13752 13759 dtrace_helper_trace(helper,
13753 13760 mstate, vstate, i + 1);
13754 13761
13755 13762 rval = dtrace_dif_emulate(helper->dtha_actions[i],
13756 13763 mstate, vstate, state);
13757 13764
13758 13765 if (*flags & CPU_DTRACE_FAULT)
13759 13766 goto err;
13760 13767 }
13761 13768
13762 13769 next:
13763 13770 if (trace)
13764 13771 dtrace_helper_trace(helper, mstate, vstate,
13765 13772 DTRACE_HELPTRACE_NEXT);
13766 13773 }
13767 13774
13768 13775 if (trace)
13769 13776 dtrace_helper_trace(helper, mstate, vstate,
13770 13777 DTRACE_HELPTRACE_DONE);
13771 13778
13772 13779 /*
13773 13780 * Restore the arg0 that we saved upon entry.
13774 13781 */
13775 13782 mstate->dtms_arg[0] = sarg0;
13776 13783 mstate->dtms_arg[1] = sarg1;
13777 13784
13778 13785 return (rval);
13779 13786
13780 13787 err:
13781 13788 if (trace)
13782 13789 dtrace_helper_trace(helper, mstate, vstate,
13783 13790 DTRACE_HELPTRACE_ERR);
13784 13791
13785 13792 /*
13786 13793 * Restore the arg0 that we saved upon entry.
13787 13794 */
13788 13795 mstate->dtms_arg[0] = sarg0;
13789 13796 mstate->dtms_arg[1] = sarg1;
13790 13797
13791 13798 return (NULL);
13792 13799 }
13793 13800
13794 13801 static void
13795 13802 dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
13796 13803 dtrace_vstate_t *vstate)
13797 13804 {
13798 13805 int i;
13799 13806
13800 13807 if (helper->dtha_predicate != NULL)
13801 13808 dtrace_difo_release(helper->dtha_predicate, vstate);
13802 13809
13803 13810 for (i = 0; i < helper->dtha_nactions; i++) {
13804 13811 ASSERT(helper->dtha_actions[i] != NULL);
13805 13812 dtrace_difo_release(helper->dtha_actions[i], vstate);
13806 13813 }
13807 13814
13808 13815 kmem_free(helper->dtha_actions,
13809 13816 helper->dtha_nactions * sizeof (dtrace_difo_t *));
13810 13817 kmem_free(helper, sizeof (dtrace_helper_action_t));
13811 13818 }
13812 13819
13813 13820 static int
13814 13821 dtrace_helper_destroygen(int gen)
13815 13822 {
13816 13823 proc_t *p = curproc;
13817 13824 dtrace_helpers_t *help = p->p_dtrace_helpers;
13818 13825 dtrace_vstate_t *vstate;
13819 13826 int i;
13820 13827
13821 13828 ASSERT(MUTEX_HELD(&dtrace_lock));
13822 13829
13823 13830 if (help == NULL || gen > help->dthps_generation)
13824 13831 return (EINVAL);
13825 13832
13826 13833 vstate = &help->dthps_vstate;
13827 13834
13828 13835 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
13829 13836 dtrace_helper_action_t *last = NULL, *h, *next;
13830 13837
13831 13838 for (h = help->dthps_actions[i]; h != NULL; h = next) {
13832 13839 next = h->dtha_next;
13833 13840
13834 13841 if (h->dtha_generation == gen) {
13835 13842 if (last != NULL) {
13836 13843 last->dtha_next = next;
13837 13844 } else {
13838 13845 help->dthps_actions[i] = next;
13839 13846 }
13840 13847
13841 13848 dtrace_helper_action_destroy(h, vstate);
13842 13849 } else {
13843 13850 last = h;
13844 13851 }
13845 13852 }
13846 13853 }
13847 13854
13848 13855 /*
13849 13856 * Interate until we've cleared out all helper providers with the
13850 13857 * given generation number.
13851 13858 */
13852 13859 for (;;) {
13853 13860 dtrace_helper_provider_t *prov;
13854 13861
13855 13862 /*
13856 13863 * Look for a helper provider with the right generation. We
13857 13864 * have to start back at the beginning of the list each time
13858 13865 * because we drop dtrace_lock. It's unlikely that we'll make
13859 13866 * more than two passes.
13860 13867 */
13861 13868 for (i = 0; i < help->dthps_nprovs; i++) {
13862 13869 prov = help->dthps_provs[i];
13863 13870
13864 13871 if (prov->dthp_generation == gen)
13865 13872 break;
13866 13873 }
13867 13874
13868 13875 /*
13869 13876 * If there were no matches, we're done.
13870 13877 */
13871 13878 if (i == help->dthps_nprovs)
13872 13879 break;
13873 13880
13874 13881 /*
13875 13882 * Move the last helper provider into this slot.
13876 13883 */
13877 13884 help->dthps_nprovs--;
13878 13885 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
13879 13886 help->dthps_provs[help->dthps_nprovs] = NULL;
13880 13887
13881 13888 mutex_exit(&dtrace_lock);
13882 13889
13883 13890 /*
13884 13891 * If we have a meta provider, remove this helper provider.
13885 13892 */
13886 13893 mutex_enter(&dtrace_meta_lock);
13887 13894 if (dtrace_meta_pid != NULL) {
13888 13895 ASSERT(dtrace_deferred_pid == NULL);
13889 13896 dtrace_helper_provider_remove(&prov->dthp_prov,
13890 13897 p->p_pid);
13891 13898 }
13892 13899 mutex_exit(&dtrace_meta_lock);
13893 13900
13894 13901 dtrace_helper_provider_destroy(prov);
13895 13902
13896 13903 mutex_enter(&dtrace_lock);
13897 13904 }
13898 13905
13899 13906 return (0);
13900 13907 }
13901 13908
13902 13909 static int
13903 13910 dtrace_helper_validate(dtrace_helper_action_t *helper)
13904 13911 {
13905 13912 int err = 0, i;
13906 13913 dtrace_difo_t *dp;
13907 13914
13908 13915 if ((dp = helper->dtha_predicate) != NULL)
13909 13916 err += dtrace_difo_validate_helper(dp);
13910 13917
13911 13918 for (i = 0; i < helper->dtha_nactions; i++)
13912 13919 err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
13913 13920
13914 13921 return (err == 0);
13915 13922 }
13916 13923
13917 13924 static int
13918 13925 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep)
13919 13926 {
13920 13927 dtrace_helpers_t *help;
13921 13928 dtrace_helper_action_t *helper, *last;
13922 13929 dtrace_actdesc_t *act;
13923 13930 dtrace_vstate_t *vstate;
13924 13931 dtrace_predicate_t *pred;
13925 13932 int count = 0, nactions = 0, i;
13926 13933
13927 13934 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
13928 13935 return (EINVAL);
13929 13936
13930 13937 help = curproc->p_dtrace_helpers;
13931 13938 last = help->dthps_actions[which];
13932 13939 vstate = &help->dthps_vstate;
13933 13940
13934 13941 for (count = 0; last != NULL; last = last->dtha_next) {
13935 13942 count++;
13936 13943 if (last->dtha_next == NULL)
13937 13944 break;
13938 13945 }
13939 13946
13940 13947 /*
13941 13948 * If we already have dtrace_helper_actions_max helper actions for this
13942 13949 * helper action type, we'll refuse to add a new one.
13943 13950 */
13944 13951 if (count >= dtrace_helper_actions_max)
13945 13952 return (ENOSPC);
13946 13953
13947 13954 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
13948 13955 helper->dtha_generation = help->dthps_generation;
13949 13956
13950 13957 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
13951 13958 ASSERT(pred->dtp_difo != NULL);
13952 13959 dtrace_difo_hold(pred->dtp_difo);
13953 13960 helper->dtha_predicate = pred->dtp_difo;
13954 13961 }
13955 13962
13956 13963 for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
13957 13964 if (act->dtad_kind != DTRACEACT_DIFEXPR)
13958 13965 goto err;
13959 13966
13960 13967 if (act->dtad_difo == NULL)
13961 13968 goto err;
13962 13969
13963 13970 nactions++;
13964 13971 }
13965 13972
13966 13973 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
13967 13974 (helper->dtha_nactions = nactions), KM_SLEEP);
13968 13975
13969 13976 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
13970 13977 dtrace_difo_hold(act->dtad_difo);
13971 13978 helper->dtha_actions[i++] = act->dtad_difo;
13972 13979 }
13973 13980
13974 13981 if (!dtrace_helper_validate(helper))
13975 13982 goto err;
13976 13983
13977 13984 if (last == NULL) {
13978 13985 help->dthps_actions[which] = helper;
13979 13986 } else {
13980 13987 last->dtha_next = helper;
13981 13988 }
13982 13989
13983 13990 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
13984 13991 dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
13985 13992 dtrace_helptrace_next = 0;
13986 13993 }
13987 13994
13988 13995 return (0);
13989 13996 err:
13990 13997 dtrace_helper_action_destroy(helper, vstate);
13991 13998 return (EINVAL);
13992 13999 }
13993 14000
13994 14001 static void
13995 14002 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
13996 14003 dof_helper_t *dofhp)
13997 14004 {
13998 14005 ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
13999 14006
14000 14007 mutex_enter(&dtrace_meta_lock);
14001 14008 mutex_enter(&dtrace_lock);
14002 14009
14003 14010 if (!dtrace_attached() || dtrace_meta_pid == NULL) {
14004 14011 /*
14005 14012 * If the dtrace module is loaded but not attached, or if
14006 14013 * there aren't isn't a meta provider registered to deal with
14007 14014 * these provider descriptions, we need to postpone creating
14008 14015 * the actual providers until later.
14009 14016 */
14010 14017
14011 14018 if (help->dthps_next == NULL && help->dthps_prev == NULL &&
14012 14019 dtrace_deferred_pid != help) {
14013 14020 help->dthps_deferred = 1;
14014 14021 help->dthps_pid = p->p_pid;
14015 14022 help->dthps_next = dtrace_deferred_pid;
14016 14023 help->dthps_prev = NULL;
14017 14024 if (dtrace_deferred_pid != NULL)
14018 14025 dtrace_deferred_pid->dthps_prev = help;
14019 14026 dtrace_deferred_pid = help;
14020 14027 }
14021 14028
14022 14029 mutex_exit(&dtrace_lock);
14023 14030
14024 14031 } else if (dofhp != NULL) {
14025 14032 /*
14026 14033 * If the dtrace module is loaded and we have a particular
14027 14034 * helper provider description, pass that off to the
14028 14035 * meta provider.
14029 14036 */
14030 14037
14031 14038 mutex_exit(&dtrace_lock);
14032 14039
14033 14040 dtrace_helper_provide(dofhp, p->p_pid);
14034 14041
14035 14042 } else {
14036 14043 /*
14037 14044 * Otherwise, just pass all the helper provider descriptions
14038 14045 * off to the meta provider.
14039 14046 */
14040 14047
14041 14048 int i;
14042 14049 mutex_exit(&dtrace_lock);
14043 14050
14044 14051 for (i = 0; i < help->dthps_nprovs; i++) {
14045 14052 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
14046 14053 p->p_pid);
14047 14054 }
14048 14055 }
14049 14056
14050 14057 mutex_exit(&dtrace_meta_lock);
14051 14058 }
14052 14059
14053 14060 static int
14054 14061 dtrace_helper_provider_add(dof_helper_t *dofhp, int gen)
14055 14062 {
14056 14063 dtrace_helpers_t *help;
14057 14064 dtrace_helper_provider_t *hprov, **tmp_provs;
14058 14065 uint_t tmp_maxprovs, i;
14059 14066
14060 14067 ASSERT(MUTEX_HELD(&dtrace_lock));
14061 14068
14062 14069 help = curproc->p_dtrace_helpers;
14063 14070 ASSERT(help != NULL);
14064 14071
14065 14072 /*
14066 14073 * If we already have dtrace_helper_providers_max helper providers,
14067 14074 * we're refuse to add a new one.
14068 14075 */
14069 14076 if (help->dthps_nprovs >= dtrace_helper_providers_max)
14070 14077 return (ENOSPC);
14071 14078
14072 14079 /*
14073 14080 * Check to make sure this isn't a duplicate.
14074 14081 */
14075 14082 for (i = 0; i < help->dthps_nprovs; i++) {
14076 14083 if (dofhp->dofhp_addr ==
14077 14084 help->dthps_provs[i]->dthp_prov.dofhp_addr)
14078 14085 return (EALREADY);
14079 14086 }
14080 14087
14081 14088 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
14082 14089 hprov->dthp_prov = *dofhp;
14083 14090 hprov->dthp_ref = 1;
14084 14091 hprov->dthp_generation = gen;
14085 14092
14086 14093 /*
14087 14094 * Allocate a bigger table for helper providers if it's already full.
14088 14095 */
14089 14096 if (help->dthps_maxprovs == help->dthps_nprovs) {
14090 14097 tmp_maxprovs = help->dthps_maxprovs;
14091 14098 tmp_provs = help->dthps_provs;
14092 14099
14093 14100 if (help->dthps_maxprovs == 0)
14094 14101 help->dthps_maxprovs = 2;
14095 14102 else
14096 14103 help->dthps_maxprovs *= 2;
14097 14104 if (help->dthps_maxprovs > dtrace_helper_providers_max)
14098 14105 help->dthps_maxprovs = dtrace_helper_providers_max;
14099 14106
14100 14107 ASSERT(tmp_maxprovs < help->dthps_maxprovs);
14101 14108
14102 14109 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
14103 14110 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
14104 14111
14105 14112 if (tmp_provs != NULL) {
14106 14113 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
14107 14114 sizeof (dtrace_helper_provider_t *));
14108 14115 kmem_free(tmp_provs, tmp_maxprovs *
14109 14116 sizeof (dtrace_helper_provider_t *));
14110 14117 }
14111 14118 }
14112 14119
14113 14120 help->dthps_provs[help->dthps_nprovs] = hprov;
14114 14121 help->dthps_nprovs++;
14115 14122
14116 14123 return (0);
14117 14124 }
14118 14125
14119 14126 static void
14120 14127 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
14121 14128 {
14122 14129 mutex_enter(&dtrace_lock);
14123 14130
14124 14131 if (--hprov->dthp_ref == 0) {
14125 14132 dof_hdr_t *dof;
14126 14133 mutex_exit(&dtrace_lock);
14127 14134 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
14128 14135 dtrace_dof_destroy(dof);
14129 14136 kmem_free(hprov, sizeof (dtrace_helper_provider_t));
14130 14137 } else {
14131 14138 mutex_exit(&dtrace_lock);
14132 14139 }
14133 14140 }
14134 14141
14135 14142 static int
14136 14143 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
14137 14144 {
14138 14145 uintptr_t daddr = (uintptr_t)dof;
14139 14146 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
14140 14147 dof_provider_t *provider;
14141 14148 dof_probe_t *probe;
14142 14149 uint8_t *arg;
14143 14150 char *strtab, *typestr;
14144 14151 dof_stridx_t typeidx;
14145 14152 size_t typesz;
14146 14153 uint_t nprobes, j, k;
14147 14154
14148 14155 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
14149 14156
14150 14157 if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
14151 14158 dtrace_dof_error(dof, "misaligned section offset");
14152 14159 return (-1);
14153 14160 }
14154 14161
14155 14162 /*
14156 14163 * The section needs to be large enough to contain the DOF provider
14157 14164 * structure appropriate for the given version.
14158 14165 */
14159 14166 if (sec->dofs_size <
14160 14167 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
14161 14168 offsetof(dof_provider_t, dofpv_prenoffs) :
14162 14169 sizeof (dof_provider_t))) {
14163 14170 dtrace_dof_error(dof, "provider section too small");
14164 14171 return (-1);
14165 14172 }
14166 14173
14167 14174 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
14168 14175 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
14169 14176 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
14170 14177 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
14171 14178 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
14172 14179
14173 14180 if (str_sec == NULL || prb_sec == NULL ||
14174 14181 arg_sec == NULL || off_sec == NULL)
14175 14182 return (-1);
14176 14183
14177 14184 enoff_sec = NULL;
14178 14185
14179 14186 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
14180 14187 provider->dofpv_prenoffs != DOF_SECT_NONE &&
14181 14188 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
14182 14189 provider->dofpv_prenoffs)) == NULL)
14183 14190 return (-1);
14184 14191
14185 14192 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
14186 14193
14187 14194 if (provider->dofpv_name >= str_sec->dofs_size ||
14188 14195 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
14189 14196 dtrace_dof_error(dof, "invalid provider name");
14190 14197 return (-1);
14191 14198 }
14192 14199
14193 14200 if (prb_sec->dofs_entsize == 0 ||
14194 14201 prb_sec->dofs_entsize > prb_sec->dofs_size) {
14195 14202 dtrace_dof_error(dof, "invalid entry size");
14196 14203 return (-1);
14197 14204 }
14198 14205
14199 14206 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
14200 14207 dtrace_dof_error(dof, "misaligned entry size");
14201 14208 return (-1);
14202 14209 }
14203 14210
14204 14211 if (off_sec->dofs_entsize != sizeof (uint32_t)) {
14205 14212 dtrace_dof_error(dof, "invalid entry size");
14206 14213 return (-1);
14207 14214 }
14208 14215
14209 14216 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
14210 14217 dtrace_dof_error(dof, "misaligned section offset");
14211 14218 return (-1);
14212 14219 }
14213 14220
14214 14221 if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
14215 14222 dtrace_dof_error(dof, "invalid entry size");
14216 14223 return (-1);
14217 14224 }
14218 14225
14219 14226 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
14220 14227
14221 14228 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
14222 14229
14223 14230 /*
14224 14231 * Take a pass through the probes to check for errors.
14225 14232 */
14226 14233 for (j = 0; j < nprobes; j++) {
14227 14234 probe = (dof_probe_t *)(uintptr_t)(daddr +
14228 14235 prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
14229 14236
14230 14237 if (probe->dofpr_func >= str_sec->dofs_size) {
14231 14238 dtrace_dof_error(dof, "invalid function name");
14232 14239 return (-1);
14233 14240 }
14234 14241
14235 14242 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
14236 14243 dtrace_dof_error(dof, "function name too long");
14237 14244 return (-1);
14238 14245 }
14239 14246
14240 14247 if (probe->dofpr_name >= str_sec->dofs_size ||
14241 14248 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
14242 14249 dtrace_dof_error(dof, "invalid probe name");
14243 14250 return (-1);
14244 14251 }
14245 14252
14246 14253 /*
14247 14254 * The offset count must not wrap the index, and the offsets
14248 14255 * must also not overflow the section's data.
14249 14256 */
14250 14257 if (probe->dofpr_offidx + probe->dofpr_noffs <
14251 14258 probe->dofpr_offidx ||
14252 14259 (probe->dofpr_offidx + probe->dofpr_noffs) *
14253 14260 off_sec->dofs_entsize > off_sec->dofs_size) {
14254 14261 dtrace_dof_error(dof, "invalid probe offset");
14255 14262 return (-1);
14256 14263 }
14257 14264
14258 14265 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
14259 14266 /*
14260 14267 * If there's no is-enabled offset section, make sure
14261 14268 * there aren't any is-enabled offsets. Otherwise
14262 14269 * perform the same checks as for probe offsets
14263 14270 * (immediately above).
14264 14271 */
14265 14272 if (enoff_sec == NULL) {
14266 14273 if (probe->dofpr_enoffidx != 0 ||
14267 14274 probe->dofpr_nenoffs != 0) {
14268 14275 dtrace_dof_error(dof, "is-enabled "
14269 14276 "offsets with null section");
14270 14277 return (-1);
14271 14278 }
14272 14279 } else if (probe->dofpr_enoffidx +
14273 14280 probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
14274 14281 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
14275 14282 enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
14276 14283 dtrace_dof_error(dof, "invalid is-enabled "
14277 14284 "offset");
14278 14285 return (-1);
14279 14286 }
14280 14287
14281 14288 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
14282 14289 dtrace_dof_error(dof, "zero probe and "
14283 14290 "is-enabled offsets");
14284 14291 return (-1);
14285 14292 }
14286 14293 } else if (probe->dofpr_noffs == 0) {
14287 14294 dtrace_dof_error(dof, "zero probe offsets");
14288 14295 return (-1);
14289 14296 }
14290 14297
14291 14298 if (probe->dofpr_argidx + probe->dofpr_xargc <
14292 14299 probe->dofpr_argidx ||
14293 14300 (probe->dofpr_argidx + probe->dofpr_xargc) *
14294 14301 arg_sec->dofs_entsize > arg_sec->dofs_size) {
14295 14302 dtrace_dof_error(dof, "invalid args");
14296 14303 return (-1);
14297 14304 }
14298 14305
14299 14306 typeidx = probe->dofpr_nargv;
14300 14307 typestr = strtab + probe->dofpr_nargv;
14301 14308 for (k = 0; k < probe->dofpr_nargc; k++) {
14302 14309 if (typeidx >= str_sec->dofs_size) {
14303 14310 dtrace_dof_error(dof, "bad "
14304 14311 "native argument type");
14305 14312 return (-1);
14306 14313 }
14307 14314
14308 14315 typesz = strlen(typestr) + 1;
14309 14316 if (typesz > DTRACE_ARGTYPELEN) {
14310 14317 dtrace_dof_error(dof, "native "
14311 14318 "argument type too long");
14312 14319 return (-1);
14313 14320 }
14314 14321 typeidx += typesz;
14315 14322 typestr += typesz;
14316 14323 }
14317 14324
14318 14325 typeidx = probe->dofpr_xargv;
14319 14326 typestr = strtab + probe->dofpr_xargv;
14320 14327 for (k = 0; k < probe->dofpr_xargc; k++) {
14321 14328 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
14322 14329 dtrace_dof_error(dof, "bad "
14323 14330 "native argument index");
14324 14331 return (-1);
14325 14332 }
14326 14333
14327 14334 if (typeidx >= str_sec->dofs_size) {
14328 14335 dtrace_dof_error(dof, "bad "
14329 14336 "translated argument type");
14330 14337 return (-1);
14331 14338 }
14332 14339
14333 14340 typesz = strlen(typestr) + 1;
14334 14341 if (typesz > DTRACE_ARGTYPELEN) {
14335 14342 dtrace_dof_error(dof, "translated argument "
14336 14343 "type too long");
14337 14344 return (-1);
14338 14345 }
14339 14346
14340 14347 typeidx += typesz;
14341 14348 typestr += typesz;
14342 14349 }
14343 14350 }
14344 14351
14345 14352 return (0);
14346 14353 }
14347 14354
14348 14355 static int
14349 14356 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp)
14350 14357 {
14351 14358 dtrace_helpers_t *help;
14352 14359 dtrace_vstate_t *vstate;
14353 14360 dtrace_enabling_t *enab = NULL;
14354 14361 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
14355 14362 uintptr_t daddr = (uintptr_t)dof;
14356 14363
14357 14364 ASSERT(MUTEX_HELD(&dtrace_lock));
14358 14365
14359 14366 if ((help = curproc->p_dtrace_helpers) == NULL)
14360 14367 help = dtrace_helpers_create(curproc);
14361 14368
14362 14369 vstate = &help->dthps_vstate;
14363 14370
14364 14371 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
14365 14372 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
14366 14373 dtrace_dof_destroy(dof);
14367 14374 return (rv);
14368 14375 }
14369 14376
14370 14377 /*
14371 14378 * Look for helper providers and validate their descriptions.
14372 14379 */
14373 14380 if (dhp != NULL) {
14374 14381 for (i = 0; i < dof->dofh_secnum; i++) {
14375 14382 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
14376 14383 dof->dofh_secoff + i * dof->dofh_secsize);
14377 14384
14378 14385 if (sec->dofs_type != DOF_SECT_PROVIDER)
14379 14386 continue;
14380 14387
14381 14388 if (dtrace_helper_provider_validate(dof, sec) != 0) {
14382 14389 dtrace_enabling_destroy(enab);
14383 14390 dtrace_dof_destroy(dof);
14384 14391 return (-1);
14385 14392 }
14386 14393
14387 14394 nprovs++;
14388 14395 }
14389 14396 }
14390 14397
14391 14398 /*
14392 14399 * Now we need to walk through the ECB descriptions in the enabling.
14393 14400 */
14394 14401 for (i = 0; i < enab->dten_ndesc; i++) {
14395 14402 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
14396 14403 dtrace_probedesc_t *desc = &ep->dted_probe;
14397 14404
14398 14405 if (strcmp(desc->dtpd_provider, "dtrace") != 0)
14399 14406 continue;
14400 14407
14401 14408 if (strcmp(desc->dtpd_mod, "helper") != 0)
14402 14409 continue;
14403 14410
14404 14411 if (strcmp(desc->dtpd_func, "ustack") != 0)
14405 14412 continue;
14406 14413
14407 14414 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
14408 14415 ep)) != 0) {
14409 14416 /*
14410 14417 * Adding this helper action failed -- we are now going
14411 14418 * to rip out the entire generation and return failure.
14412 14419 */
14413 14420 (void) dtrace_helper_destroygen(help->dthps_generation);
14414 14421 dtrace_enabling_destroy(enab);
14415 14422 dtrace_dof_destroy(dof);
14416 14423 return (-1);
14417 14424 }
14418 14425
14419 14426 nhelpers++;
14420 14427 }
14421 14428
14422 14429 if (nhelpers < enab->dten_ndesc)
14423 14430 dtrace_dof_error(dof, "unmatched helpers");
14424 14431
14425 14432 gen = help->dthps_generation++;
14426 14433 dtrace_enabling_destroy(enab);
14427 14434
14428 14435 if (dhp != NULL && nprovs > 0) {
14429 14436 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
14430 14437 if (dtrace_helper_provider_add(dhp, gen) == 0) {
14431 14438 mutex_exit(&dtrace_lock);
14432 14439 dtrace_helper_provider_register(curproc, help, dhp);
14433 14440 mutex_enter(&dtrace_lock);
14434 14441
14435 14442 destroy = 0;
14436 14443 }
14437 14444 }
14438 14445
14439 14446 if (destroy)
14440 14447 dtrace_dof_destroy(dof);
14441 14448
14442 14449 return (gen);
14443 14450 }
14444 14451
14445 14452 static dtrace_helpers_t *
14446 14453 dtrace_helpers_create(proc_t *p)
14447 14454 {
14448 14455 dtrace_helpers_t *help;
14449 14456
14450 14457 ASSERT(MUTEX_HELD(&dtrace_lock));
14451 14458 ASSERT(p->p_dtrace_helpers == NULL);
14452 14459
14453 14460 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
14454 14461 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
14455 14462 DTRACE_NHELPER_ACTIONS, KM_SLEEP);
14456 14463
14457 14464 p->p_dtrace_helpers = help;
14458 14465 dtrace_helpers++;
14459 14466
14460 14467 return (help);
14461 14468 }
14462 14469
14463 14470 static void
14464 14471 dtrace_helpers_destroy(void)
14465 14472 {
14466 14473 dtrace_helpers_t *help;
14467 14474 dtrace_vstate_t *vstate;
14468 14475 proc_t *p = curproc;
14469 14476 int i;
14470 14477
14471 14478 mutex_enter(&dtrace_lock);
14472 14479
14473 14480 ASSERT(p->p_dtrace_helpers != NULL);
14474 14481 ASSERT(dtrace_helpers > 0);
14475 14482
14476 14483 help = p->p_dtrace_helpers;
14477 14484 vstate = &help->dthps_vstate;
14478 14485
14479 14486 /*
14480 14487 * We're now going to lose the help from this process.
14481 14488 */
14482 14489 p->p_dtrace_helpers = NULL;
14483 14490 dtrace_sync();
14484 14491
14485 14492 /*
14486 14493 * Destory the helper actions.
14487 14494 */
14488 14495 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14489 14496 dtrace_helper_action_t *h, *next;
14490 14497
14491 14498 for (h = help->dthps_actions[i]; h != NULL; h = next) {
14492 14499 next = h->dtha_next;
14493 14500 dtrace_helper_action_destroy(h, vstate);
14494 14501 h = next;
14495 14502 }
14496 14503 }
14497 14504
14498 14505 mutex_exit(&dtrace_lock);
14499 14506
14500 14507 /*
14501 14508 * Destroy the helper providers.
14502 14509 */
14503 14510 if (help->dthps_maxprovs > 0) {
14504 14511 mutex_enter(&dtrace_meta_lock);
14505 14512 if (dtrace_meta_pid != NULL) {
14506 14513 ASSERT(dtrace_deferred_pid == NULL);
14507 14514
14508 14515 for (i = 0; i < help->dthps_nprovs; i++) {
14509 14516 dtrace_helper_provider_remove(
14510 14517 &help->dthps_provs[i]->dthp_prov, p->p_pid);
14511 14518 }
14512 14519 } else {
14513 14520 mutex_enter(&dtrace_lock);
14514 14521 ASSERT(help->dthps_deferred == 0 ||
14515 14522 help->dthps_next != NULL ||
14516 14523 help->dthps_prev != NULL ||
14517 14524 help == dtrace_deferred_pid);
14518 14525
14519 14526 /*
14520 14527 * Remove the helper from the deferred list.
14521 14528 */
14522 14529 if (help->dthps_next != NULL)
14523 14530 help->dthps_next->dthps_prev = help->dthps_prev;
14524 14531 if (help->dthps_prev != NULL)
14525 14532 help->dthps_prev->dthps_next = help->dthps_next;
14526 14533 if (dtrace_deferred_pid == help) {
14527 14534 dtrace_deferred_pid = help->dthps_next;
14528 14535 ASSERT(help->dthps_prev == NULL);
14529 14536 }
14530 14537
14531 14538 mutex_exit(&dtrace_lock);
14532 14539 }
14533 14540
14534 14541 mutex_exit(&dtrace_meta_lock);
14535 14542
14536 14543 for (i = 0; i < help->dthps_nprovs; i++) {
14537 14544 dtrace_helper_provider_destroy(help->dthps_provs[i]);
14538 14545 }
14539 14546
14540 14547 kmem_free(help->dthps_provs, help->dthps_maxprovs *
14541 14548 sizeof (dtrace_helper_provider_t *));
14542 14549 }
14543 14550
14544 14551 mutex_enter(&dtrace_lock);
14545 14552
14546 14553 dtrace_vstate_fini(&help->dthps_vstate);
14547 14554 kmem_free(help->dthps_actions,
14548 14555 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
14549 14556 kmem_free(help, sizeof (dtrace_helpers_t));
14550 14557
14551 14558 --dtrace_helpers;
14552 14559 mutex_exit(&dtrace_lock);
14553 14560 }
14554 14561
14555 14562 static void
14556 14563 dtrace_helpers_duplicate(proc_t *from, proc_t *to)
14557 14564 {
14558 14565 dtrace_helpers_t *help, *newhelp;
14559 14566 dtrace_helper_action_t *helper, *new, *last;
14560 14567 dtrace_difo_t *dp;
14561 14568 dtrace_vstate_t *vstate;
14562 14569 int i, j, sz, hasprovs = 0;
14563 14570
14564 14571 mutex_enter(&dtrace_lock);
14565 14572 ASSERT(from->p_dtrace_helpers != NULL);
14566 14573 ASSERT(dtrace_helpers > 0);
14567 14574
14568 14575 help = from->p_dtrace_helpers;
14569 14576 newhelp = dtrace_helpers_create(to);
14570 14577 ASSERT(to->p_dtrace_helpers != NULL);
14571 14578
14572 14579 newhelp->dthps_generation = help->dthps_generation;
14573 14580 vstate = &newhelp->dthps_vstate;
14574 14581
14575 14582 /*
14576 14583 * Duplicate the helper actions.
14577 14584 */
14578 14585 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14579 14586 if ((helper = help->dthps_actions[i]) == NULL)
14580 14587 continue;
14581 14588
14582 14589 for (last = NULL; helper != NULL; helper = helper->dtha_next) {
14583 14590 new = kmem_zalloc(sizeof (dtrace_helper_action_t),
14584 14591 KM_SLEEP);
14585 14592 new->dtha_generation = helper->dtha_generation;
14586 14593
14587 14594 if ((dp = helper->dtha_predicate) != NULL) {
14588 14595 dp = dtrace_difo_duplicate(dp, vstate);
14589 14596 new->dtha_predicate = dp;
14590 14597 }
14591 14598
14592 14599 new->dtha_nactions = helper->dtha_nactions;
14593 14600 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
14594 14601 new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
14595 14602
14596 14603 for (j = 0; j < new->dtha_nactions; j++) {
14597 14604 dtrace_difo_t *dp = helper->dtha_actions[j];
14598 14605
14599 14606 ASSERT(dp != NULL);
14600 14607 dp = dtrace_difo_duplicate(dp, vstate);
14601 14608 new->dtha_actions[j] = dp;
14602 14609 }
14603 14610
14604 14611 if (last != NULL) {
14605 14612 last->dtha_next = new;
14606 14613 } else {
14607 14614 newhelp->dthps_actions[i] = new;
14608 14615 }
14609 14616
14610 14617 last = new;
14611 14618 }
14612 14619 }
14613 14620
14614 14621 /*
14615 14622 * Duplicate the helper providers and register them with the
14616 14623 * DTrace framework.
14617 14624 */
14618 14625 if (help->dthps_nprovs > 0) {
14619 14626 newhelp->dthps_nprovs = help->dthps_nprovs;
14620 14627 newhelp->dthps_maxprovs = help->dthps_nprovs;
14621 14628 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
14622 14629 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
14623 14630 for (i = 0; i < newhelp->dthps_nprovs; i++) {
14624 14631 newhelp->dthps_provs[i] = help->dthps_provs[i];
14625 14632 newhelp->dthps_provs[i]->dthp_ref++;
14626 14633 }
14627 14634
14628 14635 hasprovs = 1;
14629 14636 }
14630 14637
14631 14638 mutex_exit(&dtrace_lock);
14632 14639
14633 14640 if (hasprovs)
14634 14641 dtrace_helper_provider_register(to, newhelp, NULL);
14635 14642 }
14636 14643
14637 14644 /*
14638 14645 * DTrace Hook Functions
14639 14646 */
14640 14647 static void
14641 14648 dtrace_module_loaded(struct modctl *ctl)
14642 14649 {
14643 14650 dtrace_provider_t *prv;
14644 14651
14645 14652 mutex_enter(&dtrace_provider_lock);
14646 14653 mutex_enter(&mod_lock);
14647 14654
14648 14655 ASSERT(ctl->mod_busy);
14649 14656
14650 14657 /*
14651 14658 * We're going to call each providers per-module provide operation
14652 14659 * specifying only this module.
14653 14660 */
14654 14661 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
14655 14662 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
14656 14663
14657 14664 mutex_exit(&mod_lock);
14658 14665 mutex_exit(&dtrace_provider_lock);
14659 14666
14660 14667 /*
14661 14668 * If we have any retained enablings, we need to match against them.
14662 14669 * Enabling probes requires that cpu_lock be held, and we cannot hold
14663 14670 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14664 14671 * module. (In particular, this happens when loading scheduling
14665 14672 * classes.) So if we have any retained enablings, we need to dispatch
14666 14673 * our task queue to do the match for us.
14667 14674 */
14668 14675 mutex_enter(&dtrace_lock);
14669 14676
14670 14677 if (dtrace_retained == NULL) {
14671 14678 mutex_exit(&dtrace_lock);
14672 14679 return;
14673 14680 }
14674 14681
14675 14682 (void) taskq_dispatch(dtrace_taskq,
14676 14683 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
14677 14684
14678 14685 mutex_exit(&dtrace_lock);
14679 14686
14680 14687 /*
14681 14688 * And now, for a little heuristic sleaze: in general, we want to
14682 14689 * match modules as soon as they load. However, we cannot guarantee
14683 14690 * this, because it would lead us to the lock ordering violation
14684 14691 * outlined above. The common case, of course, is that cpu_lock is
14685 14692 * _not_ held -- so we delay here for a clock tick, hoping that that's
14686 14693 * long enough for the task queue to do its work. If it's not, it's
14687 14694 * not a serious problem -- it just means that the module that we
14688 14695 * just loaded may not be immediately instrumentable.
14689 14696 */
14690 14697 delay(1);
14691 14698 }
14692 14699
14693 14700 static void
14694 14701 dtrace_module_unloaded(struct modctl *ctl)
14695 14702 {
14696 14703 dtrace_probe_t template, *probe, *first, *next;
14697 14704 dtrace_provider_t *prov;
14698 14705
14699 14706 template.dtpr_mod = ctl->mod_modname;
14700 14707
14701 14708 mutex_enter(&dtrace_provider_lock);
14702 14709 mutex_enter(&mod_lock);
14703 14710 mutex_enter(&dtrace_lock);
14704 14711
14705 14712 if (dtrace_bymod == NULL) {
14706 14713 /*
14707 14714 * The DTrace module is loaded (obviously) but not attached;
14708 14715 * we don't have any work to do.
14709 14716 */
14710 14717 mutex_exit(&dtrace_provider_lock);
14711 14718 mutex_exit(&mod_lock);
14712 14719 mutex_exit(&dtrace_lock);
14713 14720 return;
14714 14721 }
14715 14722
14716 14723 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
14717 14724 probe != NULL; probe = probe->dtpr_nextmod) {
14718 14725 if (probe->dtpr_ecb != NULL) {
14719 14726 mutex_exit(&dtrace_provider_lock);
14720 14727 mutex_exit(&mod_lock);
14721 14728 mutex_exit(&dtrace_lock);
14722 14729
14723 14730 /*
14724 14731 * This shouldn't _actually_ be possible -- we're
14725 14732 * unloading a module that has an enabled probe in it.
14726 14733 * (It's normally up to the provider to make sure that
14727 14734 * this can't happen.) However, because dtps_enable()
14728 14735 * doesn't have a failure mode, there can be an
14729 14736 * enable/unload race. Upshot: we don't want to
14730 14737 * assert, but we're not going to disable the
14731 14738 * probe, either.
14732 14739 */
14733 14740 if (dtrace_err_verbose) {
14734 14741 cmn_err(CE_WARN, "unloaded module '%s' had "
14735 14742 "enabled probes", ctl->mod_modname);
14736 14743 }
14737 14744
14738 14745 return;
14739 14746 }
14740 14747 }
14741 14748
14742 14749 probe = first;
14743 14750
14744 14751 for (first = NULL; probe != NULL; probe = next) {
14745 14752 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
14746 14753
14747 14754 dtrace_probes[probe->dtpr_id - 1] = NULL;
14748 14755
14749 14756 next = probe->dtpr_nextmod;
14750 14757 dtrace_hash_remove(dtrace_bymod, probe);
14751 14758 dtrace_hash_remove(dtrace_byfunc, probe);
14752 14759 dtrace_hash_remove(dtrace_byname, probe);
14753 14760
14754 14761 if (first == NULL) {
14755 14762 first = probe;
14756 14763 probe->dtpr_nextmod = NULL;
14757 14764 } else {
14758 14765 probe->dtpr_nextmod = first;
14759 14766 first = probe;
14760 14767 }
14761 14768 }
14762 14769
14763 14770 /*
14764 14771 * We've removed all of the module's probes from the hash chains and
14765 14772 * from the probe array. Now issue a dtrace_sync() to be sure that
14766 14773 * everyone has cleared out from any probe array processing.
14767 14774 */
14768 14775 dtrace_sync();
14769 14776
14770 14777 for (probe = first; probe != NULL; probe = first) {
14771 14778 first = probe->dtpr_nextmod;
14772 14779 prov = probe->dtpr_provider;
14773 14780 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
14774 14781 probe->dtpr_arg);
14775 14782 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
14776 14783 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
14777 14784 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
14778 14785 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
14779 14786 kmem_free(probe, sizeof (dtrace_probe_t));
14780 14787 }
14781 14788
14782 14789 mutex_exit(&dtrace_lock);
14783 14790 mutex_exit(&mod_lock);
14784 14791 mutex_exit(&dtrace_provider_lock);
14785 14792 }
14786 14793
14787 14794 void
14788 14795 dtrace_suspend(void)
14789 14796 {
14790 14797 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
14791 14798 }
14792 14799
14793 14800 void
14794 14801 dtrace_resume(void)
14795 14802 {
14796 14803 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
14797 14804 }
14798 14805
14799 14806 static int
14800 14807 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
14801 14808 {
14802 14809 ASSERT(MUTEX_HELD(&cpu_lock));
14803 14810 mutex_enter(&dtrace_lock);
14804 14811
14805 14812 switch (what) {
14806 14813 case CPU_CONFIG: {
14807 14814 dtrace_state_t *state;
14808 14815 dtrace_optval_t *opt, rs, c;
14809 14816
14810 14817 /*
14811 14818 * For now, we only allocate a new buffer for anonymous state.
14812 14819 */
14813 14820 if ((state = dtrace_anon.dta_state) == NULL)
14814 14821 break;
14815 14822
14816 14823 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
14817 14824 break;
14818 14825
14819 14826 opt = state->dts_options;
14820 14827 c = opt[DTRACEOPT_CPU];
14821 14828
14822 14829 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
14823 14830 break;
14824 14831
14825 14832 /*
14826 14833 * Regardless of what the actual policy is, we're going to
14827 14834 * temporarily set our resize policy to be manual. We're
14828 14835 * also going to temporarily set our CPU option to denote
14829 14836 * the newly configured CPU.
14830 14837 */
14831 14838 rs = opt[DTRACEOPT_BUFRESIZE];
14832 14839 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
14833 14840 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
14834 14841
14835 14842 (void) dtrace_state_buffers(state);
14836 14843
14837 14844 opt[DTRACEOPT_BUFRESIZE] = rs;
14838 14845 opt[DTRACEOPT_CPU] = c;
14839 14846
14840 14847 break;
14841 14848 }
14842 14849
14843 14850 case CPU_UNCONFIG:
14844 14851 /*
14845 14852 * We don't free the buffer in the CPU_UNCONFIG case. (The
14846 14853 * buffer will be freed when the consumer exits.)
14847 14854 */
14848 14855 break;
14849 14856
14850 14857 default:
14851 14858 break;
14852 14859 }
14853 14860
14854 14861 mutex_exit(&dtrace_lock);
14855 14862 return (0);
14856 14863 }
14857 14864
14858 14865 static void
14859 14866 dtrace_cpu_setup_initial(processorid_t cpu)
14860 14867 {
14861 14868 (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
14862 14869 }
14863 14870
14864 14871 static void
14865 14872 dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
14866 14873 {
14867 14874 if (dtrace_toxranges >= dtrace_toxranges_max) {
14868 14875 int osize, nsize;
14869 14876 dtrace_toxrange_t *range;
14870 14877
14871 14878 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14872 14879
14873 14880 if (osize == 0) {
14874 14881 ASSERT(dtrace_toxrange == NULL);
14875 14882 ASSERT(dtrace_toxranges_max == 0);
14876 14883 dtrace_toxranges_max = 1;
14877 14884 } else {
14878 14885 dtrace_toxranges_max <<= 1;
14879 14886 }
14880 14887
14881 14888 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14882 14889 range = kmem_zalloc(nsize, KM_SLEEP);
14883 14890
14884 14891 if (dtrace_toxrange != NULL) {
14885 14892 ASSERT(osize != 0);
14886 14893 bcopy(dtrace_toxrange, range, osize);
14887 14894 kmem_free(dtrace_toxrange, osize);
14888 14895 }
14889 14896
14890 14897 dtrace_toxrange = range;
14891 14898 }
14892 14899
14893 14900 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14894 14901 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14895 14902
14896 14903 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14897 14904 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14898 14905 dtrace_toxranges++;
14899 14906 }
14900 14907
14901 14908 static void
14902 14909 dtrace_getf_barrier()
14903 14910 {
14904 14911 /*
14905 14912 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
14906 14913 * that contain calls to getf(), this routine will be called on every
14907 14914 * closef() before either the underlying vnode is released or the
14908 14915 * file_t itself is freed. By the time we are here, it is essential
14909 14916 * that the file_t can no longer be accessed from a call to getf()
14910 14917 * in probe context -- that assures that a dtrace_sync() can be used
14911 14918 * to clear out any enablings referring to the old structures.
14912 14919 */
14913 14920 if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
14914 14921 kcred->cr_zone->zone_dtrace_getf != 0)
14915 14922 dtrace_sync();
14916 14923 }
14917 14924
14918 14925 /*
14919 14926 * DTrace Driver Cookbook Functions
14920 14927 */
14921 14928 /*ARGSUSED*/
14922 14929 static int
14923 14930 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14924 14931 {
14925 14932 dtrace_provider_id_t id;
14926 14933 dtrace_state_t *state = NULL;
14927 14934 dtrace_enabling_t *enab;
14928 14935
14929 14936 mutex_enter(&cpu_lock);
14930 14937 mutex_enter(&dtrace_provider_lock);
14931 14938 mutex_enter(&dtrace_lock);
14932 14939
14933 14940 if (ddi_soft_state_init(&dtrace_softstate,
14934 14941 sizeof (dtrace_state_t), 0) != 0) {
14935 14942 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14936 14943 mutex_exit(&cpu_lock);
14937 14944 mutex_exit(&dtrace_provider_lock);
14938 14945 mutex_exit(&dtrace_lock);
14939 14946 return (DDI_FAILURE);
14940 14947 }
14941 14948
14942 14949 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
14943 14950 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
14944 14951 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
14945 14952 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
14946 14953 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
14947 14954 ddi_remove_minor_node(devi, NULL);
14948 14955 ddi_soft_state_fini(&dtrace_softstate);
14949 14956 mutex_exit(&cpu_lock);
14950 14957 mutex_exit(&dtrace_provider_lock);
14951 14958 mutex_exit(&dtrace_lock);
14952 14959 return (DDI_FAILURE);
14953 14960 }
14954 14961
14955 14962 ddi_report_dev(devi);
14956 14963 dtrace_devi = devi;
14957 14964
14958 14965 dtrace_modload = dtrace_module_loaded;
14959 14966 dtrace_modunload = dtrace_module_unloaded;
14960 14967 dtrace_cpu_init = dtrace_cpu_setup_initial;
14961 14968 dtrace_helpers_cleanup = dtrace_helpers_destroy;
14962 14969 dtrace_helpers_fork = dtrace_helpers_duplicate;
14963 14970 dtrace_cpustart_init = dtrace_suspend;
14964 14971 dtrace_cpustart_fini = dtrace_resume;
14965 14972 dtrace_debugger_init = dtrace_suspend;
14966 14973 dtrace_debugger_fini = dtrace_resume;
14967 14974
14968 14975 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
14969 14976
14970 14977 ASSERT(MUTEX_HELD(&cpu_lock));
14971 14978
14972 14979 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
14973 14980 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
14974 14981 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
14975 14982 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
14976 14983 VM_SLEEP | VMC_IDENTIFIER);
14977 14984 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
14978 14985 1, INT_MAX, 0);
14979 14986
14980 14987 dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
14981 14988 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
14982 14989 NULL, NULL, NULL, NULL, NULL, 0);
14983 14990
14984 14991 ASSERT(MUTEX_HELD(&cpu_lock));
14985 14992 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
14986 14993 offsetof(dtrace_probe_t, dtpr_nextmod),
14987 14994 offsetof(dtrace_probe_t, dtpr_prevmod));
14988 14995
14989 14996 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
14990 14997 offsetof(dtrace_probe_t, dtpr_nextfunc),
14991 14998 offsetof(dtrace_probe_t, dtpr_prevfunc));
14992 14999
14993 15000 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
14994 15001 offsetof(dtrace_probe_t, dtpr_nextname),
14995 15002 offsetof(dtrace_probe_t, dtpr_prevname));
14996 15003
14997 15004 if (dtrace_retain_max < 1) {
14998 15005 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
14999 15006 "setting to 1", dtrace_retain_max);
15000 15007 dtrace_retain_max = 1;
15001 15008 }
15002 15009
15003 15010 /*
15004 15011 * Now discover our toxic ranges.
15005 15012 */
15006 15013 dtrace_toxic_ranges(dtrace_toxrange_add);
15007 15014
15008 15015 /*
15009 15016 * Before we register ourselves as a provider to our own framework,
15010 15017 * we would like to assert that dtrace_provider is NULL -- but that's
15011 15018 * not true if we were loaded as a dependency of a DTrace provider.
15012 15019 * Once we've registered, we can assert that dtrace_provider is our
15013 15020 * pseudo provider.
15014 15021 */
15015 15022 (void) dtrace_register("dtrace", &dtrace_provider_attr,
15016 15023 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
15017 15024
15018 15025 ASSERT(dtrace_provider != NULL);
15019 15026 ASSERT((dtrace_provider_id_t)dtrace_provider == id);
15020 15027
15021 15028 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
15022 15029 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
15023 15030 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
15024 15031 dtrace_provider, NULL, NULL, "END", 0, NULL);
15025 15032 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
15026 15033 dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
15027 15034
15028 15035 dtrace_anon_property();
15029 15036 mutex_exit(&cpu_lock);
15030 15037
15031 15038 /*
15032 15039 * If DTrace helper tracing is enabled, we need to allocate the
15033 15040 * trace buffer and initialize the values.
15034 15041 */
15035 15042 if (dtrace_helptrace_enabled) {
15036 15043 ASSERT(dtrace_helptrace_buffer == NULL);
15037 15044 dtrace_helptrace_buffer =
15038 15045 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
15039 15046 dtrace_helptrace_next = 0;
15040 15047 }
15041 15048
15042 15049 /*
15043 15050 * If there are already providers, we must ask them to provide their
15044 15051 * probes, and then match any anonymous enabling against them. Note
15045 15052 * that there should be no other retained enablings at this time:
15046 15053 * the only retained enablings at this time should be the anonymous
15047 15054 * enabling.
15048 15055 */
15049 15056 if (dtrace_anon.dta_enabling != NULL) {
15050 15057 ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
15051 15058
15052 15059 dtrace_enabling_provide(NULL);
15053 15060 state = dtrace_anon.dta_state;
15054 15061
15055 15062 /*
15056 15063 * We couldn't hold cpu_lock across the above call to
15057 15064 * dtrace_enabling_provide(), but we must hold it to actually
15058 15065 * enable the probes. We have to drop all of our locks, pick
15059 15066 * up cpu_lock, and regain our locks before matching the
15060 15067 * retained anonymous enabling.
15061 15068 */
15062 15069 mutex_exit(&dtrace_lock);
15063 15070 mutex_exit(&dtrace_provider_lock);
15064 15071
15065 15072 mutex_enter(&cpu_lock);
15066 15073 mutex_enter(&dtrace_provider_lock);
15067 15074 mutex_enter(&dtrace_lock);
15068 15075
15069 15076 if ((enab = dtrace_anon.dta_enabling) != NULL)
15070 15077 (void) dtrace_enabling_match(enab, NULL);
15071 15078
15072 15079 mutex_exit(&cpu_lock);
15073 15080 }
15074 15081
15075 15082 mutex_exit(&dtrace_lock);
15076 15083 mutex_exit(&dtrace_provider_lock);
15077 15084
15078 15085 if (state != NULL) {
15079 15086 /*
15080 15087 * If we created any anonymous state, set it going now.
15081 15088 */
15082 15089 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
15083 15090 }
15084 15091
15085 15092 return (DDI_SUCCESS);
15086 15093 }
15087 15094
15088 15095 /*ARGSUSED*/
15089 15096 static int
15090 15097 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
15091 15098 {
15092 15099 dtrace_state_t *state;
15093 15100 uint32_t priv;
15094 15101 uid_t uid;
15095 15102 zoneid_t zoneid;
15096 15103
15097 15104 if (getminor(*devp) == DTRACEMNRN_HELPER)
15098 15105 return (0);
15099 15106
15100 15107 /*
15101 15108 * If this wasn't an open with the "helper" minor, then it must be
15102 15109 * the "dtrace" minor.
15103 15110 */
15104 15111 if (getminor(*devp) != DTRACEMNRN_DTRACE)
15105 15112 return (ENXIO);
15106 15113
15107 15114 /*
15108 15115 * If no DTRACE_PRIV_* bits are set in the credential, then the
15109 15116 * caller lacks sufficient permission to do anything with DTrace.
15110 15117 */
15111 15118 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
15112 15119 if (priv == DTRACE_PRIV_NONE)
15113 15120 return (EACCES);
15114 15121
15115 15122 /*
15116 15123 * Ask all providers to provide all their probes.
15117 15124 */
15118 15125 mutex_enter(&dtrace_provider_lock);
15119 15126 dtrace_probe_provide(NULL, NULL);
15120 15127 mutex_exit(&dtrace_provider_lock);
15121 15128
15122 15129 mutex_enter(&cpu_lock);
15123 15130 mutex_enter(&dtrace_lock);
15124 15131 dtrace_opens++;
15125 15132 dtrace_membar_producer();
15126 15133
15127 15134 /*
15128 15135 * If the kernel debugger is active (that is, if the kernel debugger
15129 15136 * modified text in some way), we won't allow the open.
15130 15137 */
15131 15138 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
15132 15139 dtrace_opens--;
15133 15140 mutex_exit(&cpu_lock);
15134 15141 mutex_exit(&dtrace_lock);
15135 15142 return (EBUSY);
15136 15143 }
15137 15144
15138 15145 state = dtrace_state_create(devp, cred_p);
15139 15146 mutex_exit(&cpu_lock);
15140 15147
15141 15148 if (state == NULL) {
15142 15149 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
15143 15150 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15144 15151 mutex_exit(&dtrace_lock);
15145 15152 return (EAGAIN);
15146 15153 }
15147 15154
15148 15155 mutex_exit(&dtrace_lock);
15149 15156
15150 15157 return (0);
15151 15158 }
15152 15159
15153 15160 /*ARGSUSED*/
15154 15161 static int
15155 15162 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
15156 15163 {
15157 15164 minor_t minor = getminor(dev);
15158 15165 dtrace_state_t *state;
15159 15166
15160 15167 if (minor == DTRACEMNRN_HELPER)
15161 15168 return (0);
15162 15169
15163 15170 state = ddi_get_soft_state(dtrace_softstate, minor);
15164 15171
15165 15172 mutex_enter(&cpu_lock);
15166 15173 mutex_enter(&dtrace_lock);
15167 15174
15168 15175 if (state->dts_anon) {
15169 15176 /*
15170 15177 * There is anonymous state. Destroy that first.
15171 15178 */
15172 15179 ASSERT(dtrace_anon.dta_state == NULL);
15173 15180 dtrace_state_destroy(state->dts_anon);
15174 15181 }
15175 15182
15176 15183 dtrace_state_destroy(state);
15177 15184 ASSERT(dtrace_opens > 0);
15178 15185
15179 15186 /*
15180 15187 * Only relinquish control of the kernel debugger interface when there
15181 15188 * are no consumers and no anonymous enablings.
15182 15189 */
15183 15190 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
15184 15191 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15185 15192
15186 15193 mutex_exit(&dtrace_lock);
15187 15194 mutex_exit(&cpu_lock);
15188 15195
15189 15196 return (0);
15190 15197 }
15191 15198
15192 15199 /*ARGSUSED*/
15193 15200 static int
15194 15201 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
15195 15202 {
15196 15203 int rval;
15197 15204 dof_helper_t help, *dhp = NULL;
15198 15205
15199 15206 switch (cmd) {
15200 15207 case DTRACEHIOC_ADDDOF:
15201 15208 if (copyin((void *)arg, &help, sizeof (help)) != 0) {
15202 15209 dtrace_dof_error(NULL, "failed to copyin DOF helper");
15203 15210 return (EFAULT);
15204 15211 }
15205 15212
15206 15213 dhp = &help;
15207 15214 arg = (intptr_t)help.dofhp_dof;
15208 15215 /*FALLTHROUGH*/
15209 15216
15210 15217 case DTRACEHIOC_ADD: {
15211 15218 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
15212 15219
15213 15220 if (dof == NULL)
15214 15221 return (rval);
15215 15222
15216 15223 mutex_enter(&dtrace_lock);
15217 15224
15218 15225 /*
15219 15226 * dtrace_helper_slurp() takes responsibility for the dof --
15220 15227 * it may free it now or it may save it and free it later.
15221 15228 */
15222 15229 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
15223 15230 *rv = rval;
15224 15231 rval = 0;
15225 15232 } else {
15226 15233 rval = EINVAL;
15227 15234 }
15228 15235
15229 15236 mutex_exit(&dtrace_lock);
15230 15237 return (rval);
15231 15238 }
15232 15239
15233 15240 case DTRACEHIOC_REMOVE: {
15234 15241 mutex_enter(&dtrace_lock);
15235 15242 rval = dtrace_helper_destroygen(arg);
15236 15243 mutex_exit(&dtrace_lock);
15237 15244
15238 15245 return (rval);
15239 15246 }
15240 15247
15241 15248 default:
15242 15249 break;
15243 15250 }
15244 15251
15245 15252 return (ENOTTY);
15246 15253 }
15247 15254
15248 15255 /*ARGSUSED*/
15249 15256 static int
15250 15257 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
15251 15258 {
15252 15259 minor_t minor = getminor(dev);
15253 15260 dtrace_state_t *state;
15254 15261 int rval;
15255 15262
15256 15263 if (minor == DTRACEMNRN_HELPER)
15257 15264 return (dtrace_ioctl_helper(cmd, arg, rv));
15258 15265
15259 15266 state = ddi_get_soft_state(dtrace_softstate, minor);
15260 15267
15261 15268 if (state->dts_anon) {
15262 15269 ASSERT(dtrace_anon.dta_state == NULL);
15263 15270 state = state->dts_anon;
15264 15271 }
15265 15272
15266 15273 switch (cmd) {
15267 15274 case DTRACEIOC_PROVIDER: {
15268 15275 dtrace_providerdesc_t pvd;
15269 15276 dtrace_provider_t *pvp;
15270 15277
15271 15278 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
15272 15279 return (EFAULT);
15273 15280
15274 15281 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
15275 15282 mutex_enter(&dtrace_provider_lock);
15276 15283
15277 15284 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
15278 15285 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
15279 15286 break;
15280 15287 }
15281 15288
15282 15289 mutex_exit(&dtrace_provider_lock);
15283 15290
15284 15291 if (pvp == NULL)
15285 15292 return (ESRCH);
15286 15293
15287 15294 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
15288 15295 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
15289 15296 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
15290 15297 return (EFAULT);
15291 15298
15292 15299 return (0);
15293 15300 }
15294 15301
15295 15302 case DTRACEIOC_EPROBE: {
15296 15303 dtrace_eprobedesc_t epdesc;
15297 15304 dtrace_ecb_t *ecb;
15298 15305 dtrace_action_t *act;
15299 15306 void *buf;
15300 15307 size_t size;
15301 15308 uintptr_t dest;
15302 15309 int nrecs;
15303 15310
15304 15311 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
15305 15312 return (EFAULT);
15306 15313
15307 15314 mutex_enter(&dtrace_lock);
15308 15315
15309 15316 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
15310 15317 mutex_exit(&dtrace_lock);
15311 15318 return (EINVAL);
15312 15319 }
15313 15320
15314 15321 if (ecb->dte_probe == NULL) {
15315 15322 mutex_exit(&dtrace_lock);
15316 15323 return (EINVAL);
15317 15324 }
15318 15325
15319 15326 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
15320 15327 epdesc.dtepd_uarg = ecb->dte_uarg;
15321 15328 epdesc.dtepd_size = ecb->dte_size;
15322 15329
15323 15330 nrecs = epdesc.dtepd_nrecs;
15324 15331 epdesc.dtepd_nrecs = 0;
15325 15332 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
15326 15333 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
15327 15334 continue;
15328 15335
15329 15336 epdesc.dtepd_nrecs++;
15330 15337 }
15331 15338
15332 15339 /*
15333 15340 * Now that we have the size, we need to allocate a temporary
15334 15341 * buffer in which to store the complete description. We need
15335 15342 * the temporary buffer to be able to drop dtrace_lock()
15336 15343 * across the copyout(), below.
15337 15344 */
15338 15345 size = sizeof (dtrace_eprobedesc_t) +
15339 15346 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
15340 15347
15341 15348 buf = kmem_alloc(size, KM_SLEEP);
15342 15349 dest = (uintptr_t)buf;
15343 15350
15344 15351 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
15345 15352 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
15346 15353
15347 15354 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
15348 15355 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
15349 15356 continue;
15350 15357
15351 15358 if (nrecs-- == 0)
15352 15359 break;
15353 15360
15354 15361 bcopy(&act->dta_rec, (void *)dest,
15355 15362 sizeof (dtrace_recdesc_t));
15356 15363 dest += sizeof (dtrace_recdesc_t);
15357 15364 }
15358 15365
15359 15366 mutex_exit(&dtrace_lock);
15360 15367
15361 15368 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
15362 15369 kmem_free(buf, size);
15363 15370 return (EFAULT);
15364 15371 }
15365 15372
15366 15373 kmem_free(buf, size);
15367 15374 return (0);
15368 15375 }
15369 15376
15370 15377 case DTRACEIOC_AGGDESC: {
15371 15378 dtrace_aggdesc_t aggdesc;
15372 15379 dtrace_action_t *act;
15373 15380 dtrace_aggregation_t *agg;
15374 15381 int nrecs;
15375 15382 uint32_t offs;
15376 15383 dtrace_recdesc_t *lrec;
15377 15384 void *buf;
15378 15385 size_t size;
15379 15386 uintptr_t dest;
15380 15387
15381 15388 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
15382 15389 return (EFAULT);
15383 15390
15384 15391 mutex_enter(&dtrace_lock);
15385 15392
15386 15393 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
15387 15394 mutex_exit(&dtrace_lock);
15388 15395 return (EINVAL);
15389 15396 }
15390 15397
15391 15398 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
15392 15399
15393 15400 nrecs = aggdesc.dtagd_nrecs;
15394 15401 aggdesc.dtagd_nrecs = 0;
15395 15402
15396 15403 offs = agg->dtag_base;
15397 15404 lrec = &agg->dtag_action.dta_rec;
15398 15405 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
15399 15406
15400 15407 for (act = agg->dtag_first; ; act = act->dta_next) {
15401 15408 ASSERT(act->dta_intuple ||
15402 15409 DTRACEACT_ISAGG(act->dta_kind));
15403 15410
15404 15411 /*
15405 15412 * If this action has a record size of zero, it
15406 15413 * denotes an argument to the aggregating action.
15407 15414 * Because the presence of this record doesn't (or
15408 15415 * shouldn't) affect the way the data is interpreted,
15409 15416 * we don't copy it out to save user-level the
15410 15417 * confusion of dealing with a zero-length record.
15411 15418 */
15412 15419 if (act->dta_rec.dtrd_size == 0) {
15413 15420 ASSERT(agg->dtag_hasarg);
15414 15421 continue;
15415 15422 }
15416 15423
15417 15424 aggdesc.dtagd_nrecs++;
15418 15425
15419 15426 if (act == &agg->dtag_action)
15420 15427 break;
15421 15428 }
15422 15429
15423 15430 /*
15424 15431 * Now that we have the size, we need to allocate a temporary
15425 15432 * buffer in which to store the complete description. We need
15426 15433 * the temporary buffer to be able to drop dtrace_lock()
15427 15434 * across the copyout(), below.
15428 15435 */
15429 15436 size = sizeof (dtrace_aggdesc_t) +
15430 15437 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
15431 15438
15432 15439 buf = kmem_alloc(size, KM_SLEEP);
15433 15440 dest = (uintptr_t)buf;
15434 15441
15435 15442 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
15436 15443 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
15437 15444
15438 15445 for (act = agg->dtag_first; ; act = act->dta_next) {
15439 15446 dtrace_recdesc_t rec = act->dta_rec;
15440 15447
15441 15448 /*
15442 15449 * See the comment in the above loop for why we pass
15443 15450 * over zero-length records.
15444 15451 */
15445 15452 if (rec.dtrd_size == 0) {
15446 15453 ASSERT(agg->dtag_hasarg);
15447 15454 continue;
15448 15455 }
15449 15456
15450 15457 if (nrecs-- == 0)
15451 15458 break;
15452 15459
15453 15460 rec.dtrd_offset -= offs;
15454 15461 bcopy(&rec, (void *)dest, sizeof (rec));
15455 15462 dest += sizeof (dtrace_recdesc_t);
15456 15463
15457 15464 if (act == &agg->dtag_action)
15458 15465 break;
15459 15466 }
15460 15467
15461 15468 mutex_exit(&dtrace_lock);
15462 15469
15463 15470 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
15464 15471 kmem_free(buf, size);
15465 15472 return (EFAULT);
15466 15473 }
15467 15474
15468 15475 kmem_free(buf, size);
15469 15476 return (0);
15470 15477 }
15471 15478
15472 15479 case DTRACEIOC_ENABLE: {
15473 15480 dof_hdr_t *dof;
15474 15481 dtrace_enabling_t *enab = NULL;
15475 15482 dtrace_vstate_t *vstate;
15476 15483 int err = 0;
15477 15484
15478 15485 *rv = 0;
15479 15486
15480 15487 /*
15481 15488 * If a NULL argument has been passed, we take this as our
15482 15489 * cue to reevaluate our enablings.
15483 15490 */
15484 15491 if (arg == NULL) {
15485 15492 dtrace_enabling_matchall();
15486 15493
15487 15494 return (0);
15488 15495 }
15489 15496
15490 15497 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
15491 15498 return (rval);
15492 15499
15493 15500 mutex_enter(&cpu_lock);
15494 15501 mutex_enter(&dtrace_lock);
15495 15502 vstate = &state->dts_vstate;
15496 15503
15497 15504 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
15498 15505 mutex_exit(&dtrace_lock);
15499 15506 mutex_exit(&cpu_lock);
15500 15507 dtrace_dof_destroy(dof);
15501 15508 return (EBUSY);
15502 15509 }
15503 15510
15504 15511 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
15505 15512 mutex_exit(&dtrace_lock);
15506 15513 mutex_exit(&cpu_lock);
15507 15514 dtrace_dof_destroy(dof);
15508 15515 return (EINVAL);
15509 15516 }
15510 15517
15511 15518 if ((rval = dtrace_dof_options(dof, state)) != 0) {
15512 15519 dtrace_enabling_destroy(enab);
15513 15520 mutex_exit(&dtrace_lock);
15514 15521 mutex_exit(&cpu_lock);
15515 15522 dtrace_dof_destroy(dof);
15516 15523 return (rval);
15517 15524 }
15518 15525
15519 15526 if ((err = dtrace_enabling_match(enab, rv)) == 0) {
15520 15527 err = dtrace_enabling_retain(enab);
15521 15528 } else {
15522 15529 dtrace_enabling_destroy(enab);
15523 15530 }
15524 15531
15525 15532 mutex_exit(&cpu_lock);
15526 15533 mutex_exit(&dtrace_lock);
15527 15534 dtrace_dof_destroy(dof);
15528 15535
15529 15536 return (err);
15530 15537 }
15531 15538
15532 15539 case DTRACEIOC_REPLICATE: {
15533 15540 dtrace_repldesc_t desc;
15534 15541 dtrace_probedesc_t *match = &desc.dtrpd_match;
15535 15542 dtrace_probedesc_t *create = &desc.dtrpd_create;
15536 15543 int err;
15537 15544
15538 15545 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15539 15546 return (EFAULT);
15540 15547
15541 15548 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15542 15549 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15543 15550 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15544 15551 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15545 15552
15546 15553 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15547 15554 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15548 15555 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15549 15556 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15550 15557
15551 15558 mutex_enter(&dtrace_lock);
15552 15559 err = dtrace_enabling_replicate(state, match, create);
15553 15560 mutex_exit(&dtrace_lock);
15554 15561
15555 15562 return (err);
15556 15563 }
15557 15564
15558 15565 case DTRACEIOC_PROBEMATCH:
15559 15566 case DTRACEIOC_PROBES: {
15560 15567 dtrace_probe_t *probe = NULL;
15561 15568 dtrace_probedesc_t desc;
15562 15569 dtrace_probekey_t pkey;
15563 15570 dtrace_id_t i;
15564 15571 int m = 0;
15565 15572 uint32_t priv;
15566 15573 uid_t uid;
15567 15574 zoneid_t zoneid;
15568 15575
15569 15576 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15570 15577 return (EFAULT);
15571 15578
15572 15579 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15573 15580 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15574 15581 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15575 15582 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15576 15583
15577 15584 /*
15578 15585 * Before we attempt to match this probe, we want to give
15579 15586 * all providers the opportunity to provide it.
15580 15587 */
15581 15588 if (desc.dtpd_id == DTRACE_IDNONE) {
15582 15589 mutex_enter(&dtrace_provider_lock);
15583 15590 dtrace_probe_provide(&desc, NULL);
15584 15591 mutex_exit(&dtrace_provider_lock);
15585 15592 desc.dtpd_id++;
15586 15593 }
15587 15594
15588 15595 if (cmd == DTRACEIOC_PROBEMATCH) {
15589 15596 dtrace_probekey(&desc, &pkey);
15590 15597 pkey.dtpk_id = DTRACE_IDNONE;
15591 15598 }
15592 15599
15593 15600 dtrace_cred2priv(cr, &priv, &uid, &zoneid);
15594 15601
15595 15602 mutex_enter(&dtrace_lock);
15596 15603
15597 15604 if (cmd == DTRACEIOC_PROBEMATCH) {
15598 15605 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
15599 15606 if ((probe = dtrace_probes[i - 1]) != NULL &&
15600 15607 (m = dtrace_match_probe(probe, &pkey,
15601 15608 priv, uid, zoneid)) != 0)
15602 15609 break;
15603 15610 }
15604 15611
15605 15612 if (m < 0) {
15606 15613 mutex_exit(&dtrace_lock);
15607 15614 return (EINVAL);
15608 15615 }
15609 15616
15610 15617 } else {
15611 15618 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
15612 15619 if ((probe = dtrace_probes[i - 1]) != NULL &&
15613 15620 dtrace_match_priv(probe, priv, uid, zoneid))
15614 15621 break;
15615 15622 }
15616 15623 }
15617 15624
15618 15625 if (probe == NULL) {
15619 15626 mutex_exit(&dtrace_lock);
15620 15627 return (ESRCH);
15621 15628 }
15622 15629
15623 15630 dtrace_probe_description(probe, &desc);
15624 15631 mutex_exit(&dtrace_lock);
15625 15632
15626 15633 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15627 15634 return (EFAULT);
15628 15635
15629 15636 return (0);
15630 15637 }
15631 15638
15632 15639 case DTRACEIOC_PROBEARG: {
15633 15640 dtrace_argdesc_t desc;
15634 15641 dtrace_probe_t *probe;
15635 15642 dtrace_provider_t *prov;
15636 15643
15637 15644 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15638 15645 return (EFAULT);
15639 15646
15640 15647 if (desc.dtargd_id == DTRACE_IDNONE)
15641 15648 return (EINVAL);
15642 15649
15643 15650 if (desc.dtargd_ndx == DTRACE_ARGNONE)
15644 15651 return (EINVAL);
15645 15652
15646 15653 mutex_enter(&dtrace_provider_lock);
15647 15654 mutex_enter(&mod_lock);
15648 15655 mutex_enter(&dtrace_lock);
15649 15656
15650 15657 if (desc.dtargd_id > dtrace_nprobes) {
15651 15658 mutex_exit(&dtrace_lock);
15652 15659 mutex_exit(&mod_lock);
15653 15660 mutex_exit(&dtrace_provider_lock);
15654 15661 return (EINVAL);
15655 15662 }
15656 15663
15657 15664 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
15658 15665 mutex_exit(&dtrace_lock);
15659 15666 mutex_exit(&mod_lock);
15660 15667 mutex_exit(&dtrace_provider_lock);
15661 15668 return (EINVAL);
15662 15669 }
15663 15670
15664 15671 mutex_exit(&dtrace_lock);
15665 15672
15666 15673 prov = probe->dtpr_provider;
15667 15674
15668 15675 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
15669 15676 /*
15670 15677 * There isn't any typed information for this probe.
15671 15678 * Set the argument number to DTRACE_ARGNONE.
15672 15679 */
15673 15680 desc.dtargd_ndx = DTRACE_ARGNONE;
15674 15681 } else {
15675 15682 desc.dtargd_native[0] = '\0';
15676 15683 desc.dtargd_xlate[0] = '\0';
15677 15684 desc.dtargd_mapping = desc.dtargd_ndx;
15678 15685
15679 15686 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
15680 15687 probe->dtpr_id, probe->dtpr_arg, &desc);
15681 15688 }
15682 15689
15683 15690 mutex_exit(&mod_lock);
15684 15691 mutex_exit(&dtrace_provider_lock);
15685 15692
15686 15693 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15687 15694 return (EFAULT);
15688 15695
15689 15696 return (0);
15690 15697 }
15691 15698
15692 15699 case DTRACEIOC_GO: {
15693 15700 processorid_t cpuid;
15694 15701 rval = dtrace_state_go(state, &cpuid);
15695 15702
15696 15703 if (rval != 0)
15697 15704 return (rval);
15698 15705
15699 15706 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
15700 15707 return (EFAULT);
15701 15708
15702 15709 return (0);
15703 15710 }
15704 15711
15705 15712 case DTRACEIOC_STOP: {
15706 15713 processorid_t cpuid;
15707 15714
15708 15715 mutex_enter(&dtrace_lock);
15709 15716 rval = dtrace_state_stop(state, &cpuid);
15710 15717 mutex_exit(&dtrace_lock);
15711 15718
15712 15719 if (rval != 0)
15713 15720 return (rval);
15714 15721
15715 15722 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
15716 15723 return (EFAULT);
15717 15724
15718 15725 return (0);
15719 15726 }
15720 15727
15721 15728 case DTRACEIOC_DOFGET: {
15722 15729 dof_hdr_t hdr, *dof;
15723 15730 uint64_t len;
15724 15731
15725 15732 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
15726 15733 return (EFAULT);
15727 15734
15728 15735 mutex_enter(&dtrace_lock);
15729 15736 dof = dtrace_dof_create(state);
15730 15737 mutex_exit(&dtrace_lock);
15731 15738
15732 15739 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
15733 15740 rval = copyout(dof, (void *)arg, len);
15734 15741 dtrace_dof_destroy(dof);
15735 15742
15736 15743 return (rval == 0 ? 0 : EFAULT);
15737 15744 }
15738 15745
15739 15746 case DTRACEIOC_AGGSNAP:
15740 15747 case DTRACEIOC_BUFSNAP: {
15741 15748 dtrace_bufdesc_t desc;
15742 15749 caddr_t cached;
15743 15750 dtrace_buffer_t *buf;
15744 15751
15745 15752 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15746 15753 return (EFAULT);
15747 15754
15748 15755 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
15749 15756 return (EINVAL);
15750 15757
15751 15758 mutex_enter(&dtrace_lock);
15752 15759
15753 15760 if (cmd == DTRACEIOC_BUFSNAP) {
15754 15761 buf = &state->dts_buffer[desc.dtbd_cpu];
15755 15762 } else {
15756 15763 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
15757 15764 }
15758 15765
15759 15766 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
15760 15767 size_t sz = buf->dtb_offset;
15761 15768
15762 15769 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
15763 15770 mutex_exit(&dtrace_lock);
15764 15771 return (EBUSY);
15765 15772 }
15766 15773
15767 15774 /*
15768 15775 * If this buffer has already been consumed, we're
15769 15776 * going to indicate that there's nothing left here
15770 15777 * to consume.
15771 15778 */
15772 15779 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
15773 15780 mutex_exit(&dtrace_lock);
15774 15781
15775 15782 desc.dtbd_size = 0;
15776 15783 desc.dtbd_drops = 0;
15777 15784 desc.dtbd_errors = 0;
15778 15785 desc.dtbd_oldest = 0;
15779 15786 sz = sizeof (desc);
15780 15787
15781 15788 if (copyout(&desc, (void *)arg, sz) != 0)
15782 15789 return (EFAULT);
15783 15790
15784 15791 return (0);
15785 15792 }
15786 15793
15787 15794 /*
15788 15795 * If this is a ring buffer that has wrapped, we want
15789 15796 * to copy the whole thing out.
15790 15797 */
15791 15798 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
15792 15799 dtrace_buffer_polish(buf);
15793 15800 sz = buf->dtb_size;
15794 15801 }
15795 15802
15796 15803 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
15797 15804 mutex_exit(&dtrace_lock);
15798 15805 return (EFAULT);
15799 15806 }
15800 15807
15801 15808 desc.dtbd_size = sz;
15802 15809 desc.dtbd_drops = buf->dtb_drops;
15803 15810 desc.dtbd_errors = buf->dtb_errors;
15804 15811 desc.dtbd_oldest = buf->dtb_xamot_offset;
15805 15812
15806 15813 mutex_exit(&dtrace_lock);
15807 15814
15808 15815 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15809 15816 return (EFAULT);
15810 15817
15811 15818 buf->dtb_flags |= DTRACEBUF_CONSUMED;
15812 15819
15813 15820 return (0);
15814 15821 }
15815 15822
15816 15823 if (buf->dtb_tomax == NULL) {
15817 15824 ASSERT(buf->dtb_xamot == NULL);
15818 15825 mutex_exit(&dtrace_lock);
15819 15826 return (ENOENT);
15820 15827 }
15821 15828
15822 15829 cached = buf->dtb_tomax;
15823 15830 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
15824 15831
15825 15832 dtrace_xcall(desc.dtbd_cpu,
15826 15833 (dtrace_xcall_t)dtrace_buffer_switch, buf);
15827 15834
15828 15835 state->dts_errors += buf->dtb_xamot_errors;
15829 15836
15830 15837 /*
15831 15838 * If the buffers did not actually switch, then the cross call
15832 15839 * did not take place -- presumably because the given CPU is
15833 15840 * not in the ready set. If this is the case, we'll return
15834 15841 * ENOENT.
15835 15842 */
15836 15843 if (buf->dtb_tomax == cached) {
15837 15844 ASSERT(buf->dtb_xamot != cached);
15838 15845 mutex_exit(&dtrace_lock);
15839 15846 return (ENOENT);
15840 15847 }
15841 15848
15842 15849 ASSERT(cached == buf->dtb_xamot);
15843 15850
15844 15851 /*
15845 15852 * We have our snapshot; now copy it out.
15846 15853 */
15847 15854 if (copyout(buf->dtb_xamot, desc.dtbd_data,
15848 15855 buf->dtb_xamot_offset) != 0) {
15849 15856 mutex_exit(&dtrace_lock);
15850 15857 return (EFAULT);
15851 15858 }
15852 15859
15853 15860 desc.dtbd_size = buf->dtb_xamot_offset;
15854 15861 desc.dtbd_drops = buf->dtb_xamot_drops;
15855 15862 desc.dtbd_errors = buf->dtb_xamot_errors;
15856 15863 desc.dtbd_oldest = 0;
15857 15864
15858 15865 mutex_exit(&dtrace_lock);
15859 15866
15860 15867 /*
15861 15868 * Finally, copy out the buffer description.
15862 15869 */
15863 15870 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15864 15871 return (EFAULT);
15865 15872
15866 15873 return (0);
15867 15874 }
15868 15875
15869 15876 case DTRACEIOC_CONF: {
15870 15877 dtrace_conf_t conf;
15871 15878
15872 15879 bzero(&conf, sizeof (conf));
15873 15880 conf.dtc_difversion = DIF_VERSION;
15874 15881 conf.dtc_difintregs = DIF_DIR_NREGS;
15875 15882 conf.dtc_diftupregs = DIF_DTR_NREGS;
15876 15883 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
15877 15884
15878 15885 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
15879 15886 return (EFAULT);
15880 15887
15881 15888 return (0);
15882 15889 }
15883 15890
15884 15891 case DTRACEIOC_STATUS: {
15885 15892 dtrace_status_t stat;
15886 15893 dtrace_dstate_t *dstate;
15887 15894 int i, j;
15888 15895 uint64_t nerrs;
15889 15896
15890 15897 /*
15891 15898 * See the comment in dtrace_state_deadman() for the reason
15892 15899 * for setting dts_laststatus to INT64_MAX before setting
15893 15900 * it to the correct value.
15894 15901 */
15895 15902 state->dts_laststatus = INT64_MAX;
15896 15903 dtrace_membar_producer();
15897 15904 state->dts_laststatus = dtrace_gethrtime();
15898 15905
15899 15906 bzero(&stat, sizeof (stat));
15900 15907
15901 15908 mutex_enter(&dtrace_lock);
15902 15909
15903 15910 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
15904 15911 mutex_exit(&dtrace_lock);
15905 15912 return (ENOENT);
15906 15913 }
15907 15914
15908 15915 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
15909 15916 stat.dtst_exiting = 1;
15910 15917
15911 15918 nerrs = state->dts_errors;
15912 15919 dstate = &state->dts_vstate.dtvs_dynvars;
15913 15920
15914 15921 for (i = 0; i < NCPU; i++) {
15915 15922 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
15916 15923
15917 15924 stat.dtst_dyndrops += dcpu->dtdsc_drops;
15918 15925 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
15919 15926 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
15920 15927
15921 15928 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
15922 15929 stat.dtst_filled++;
15923 15930
15924 15931 nerrs += state->dts_buffer[i].dtb_errors;
15925 15932
15926 15933 for (j = 0; j < state->dts_nspeculations; j++) {
15927 15934 dtrace_speculation_t *spec;
15928 15935 dtrace_buffer_t *buf;
15929 15936
15930 15937 spec = &state->dts_speculations[j];
15931 15938 buf = &spec->dtsp_buffer[i];
15932 15939 stat.dtst_specdrops += buf->dtb_xamot_drops;
15933 15940 }
15934 15941 }
15935 15942
15936 15943 stat.dtst_specdrops_busy = state->dts_speculations_busy;
15937 15944 stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
15938 15945 stat.dtst_stkstroverflows = state->dts_stkstroverflows;
15939 15946 stat.dtst_dblerrors = state->dts_dblerrors;
15940 15947 stat.dtst_killed =
15941 15948 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
15942 15949 stat.dtst_errors = nerrs;
15943 15950
15944 15951 mutex_exit(&dtrace_lock);
15945 15952
15946 15953 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
15947 15954 return (EFAULT);
15948 15955
15949 15956 return (0);
15950 15957 }
15951 15958
15952 15959 case DTRACEIOC_FORMAT: {
15953 15960 dtrace_fmtdesc_t fmt;
15954 15961 char *str;
15955 15962 int len;
15956 15963
15957 15964 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
15958 15965 return (EFAULT);
15959 15966
15960 15967 mutex_enter(&dtrace_lock);
15961 15968
15962 15969 if (fmt.dtfd_format == 0 ||
15963 15970 fmt.dtfd_format > state->dts_nformats) {
15964 15971 mutex_exit(&dtrace_lock);
15965 15972 return (EINVAL);
15966 15973 }
15967 15974
15968 15975 /*
15969 15976 * Format strings are allocated contiguously and they are
15970 15977 * never freed; if a format index is less than the number
15971 15978 * of formats, we can assert that the format map is non-NULL
15972 15979 * and that the format for the specified index is non-NULL.
15973 15980 */
15974 15981 ASSERT(state->dts_formats != NULL);
15975 15982 str = state->dts_formats[fmt.dtfd_format - 1];
15976 15983 ASSERT(str != NULL);
15977 15984
15978 15985 len = strlen(str) + 1;
15979 15986
15980 15987 if (len > fmt.dtfd_length) {
15981 15988 fmt.dtfd_length = len;
15982 15989
15983 15990 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
15984 15991 mutex_exit(&dtrace_lock);
15985 15992 return (EINVAL);
15986 15993 }
15987 15994 } else {
15988 15995 if (copyout(str, fmt.dtfd_string, len) != 0) {
15989 15996 mutex_exit(&dtrace_lock);
15990 15997 return (EINVAL);
15991 15998 }
15992 15999 }
15993 16000
15994 16001 mutex_exit(&dtrace_lock);
15995 16002 return (0);
15996 16003 }
15997 16004
15998 16005 default:
15999 16006 break;
16000 16007 }
16001 16008
16002 16009 return (ENOTTY);
16003 16010 }
16004 16011
16005 16012 /*ARGSUSED*/
16006 16013 static int
16007 16014 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
16008 16015 {
16009 16016 dtrace_state_t *state;
16010 16017
16011 16018 switch (cmd) {
16012 16019 case DDI_DETACH:
16013 16020 break;
16014 16021
16015 16022 case DDI_SUSPEND:
16016 16023 return (DDI_SUCCESS);
16017 16024
16018 16025 default:
16019 16026 return (DDI_FAILURE);
16020 16027 }
16021 16028
16022 16029 mutex_enter(&cpu_lock);
16023 16030 mutex_enter(&dtrace_provider_lock);
16024 16031 mutex_enter(&dtrace_lock);
16025 16032
16026 16033 ASSERT(dtrace_opens == 0);
16027 16034
16028 16035 if (dtrace_helpers > 0) {
16029 16036 mutex_exit(&dtrace_provider_lock);
16030 16037 mutex_exit(&dtrace_lock);
16031 16038 mutex_exit(&cpu_lock);
16032 16039 return (DDI_FAILURE);
16033 16040 }
16034 16041
16035 16042 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
16036 16043 mutex_exit(&dtrace_provider_lock);
16037 16044 mutex_exit(&dtrace_lock);
16038 16045 mutex_exit(&cpu_lock);
16039 16046 return (DDI_FAILURE);
16040 16047 }
16041 16048
16042 16049 dtrace_provider = NULL;
16043 16050
16044 16051 if ((state = dtrace_anon_grab()) != NULL) {
16045 16052 /*
16046 16053 * If there were ECBs on this state, the provider should
16047 16054 * have not been allowed to detach; assert that there is
16048 16055 * none.
16049 16056 */
16050 16057 ASSERT(state->dts_necbs == 0);
16051 16058 dtrace_state_destroy(state);
16052 16059
16053 16060 /*
16054 16061 * If we're being detached with anonymous state, we need to
16055 16062 * indicate to the kernel debugger that DTrace is now inactive.
16056 16063 */
16057 16064 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16058 16065 }
16059 16066
16060 16067 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
16061 16068 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
16062 16069 dtrace_cpu_init = NULL;
16063 16070 dtrace_helpers_cleanup = NULL;
16064 16071 dtrace_helpers_fork = NULL;
16065 16072 dtrace_cpustart_init = NULL;
16066 16073 dtrace_cpustart_fini = NULL;
16067 16074 dtrace_debugger_init = NULL;
16068 16075 dtrace_debugger_fini = NULL;
16069 16076 dtrace_modload = NULL;
16070 16077 dtrace_modunload = NULL;
16071 16078
16072 16079 ASSERT(dtrace_getf == 0);
16073 16080 ASSERT(dtrace_closef == NULL);
16074 16081
16075 16082 mutex_exit(&cpu_lock);
16076 16083
16077 16084 if (dtrace_helptrace_enabled) {
16078 16085 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
16079 16086 dtrace_helptrace_buffer = NULL;
16080 16087 }
16081 16088
16082 16089 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
16083 16090 dtrace_probes = NULL;
16084 16091 dtrace_nprobes = 0;
16085 16092
16086 16093 dtrace_hash_destroy(dtrace_bymod);
16087 16094 dtrace_hash_destroy(dtrace_byfunc);
16088 16095 dtrace_hash_destroy(dtrace_byname);
16089 16096 dtrace_bymod = NULL;
16090 16097 dtrace_byfunc = NULL;
16091 16098 dtrace_byname = NULL;
16092 16099
16093 16100 kmem_cache_destroy(dtrace_state_cache);
16094 16101 vmem_destroy(dtrace_minor);
16095 16102 vmem_destroy(dtrace_arena);
16096 16103
16097 16104 if (dtrace_toxrange != NULL) {
16098 16105 kmem_free(dtrace_toxrange,
16099 16106 dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
16100 16107 dtrace_toxrange = NULL;
16101 16108 dtrace_toxranges = 0;
16102 16109 dtrace_toxranges_max = 0;
16103 16110 }
16104 16111
16105 16112 ddi_remove_minor_node(dtrace_devi, NULL);
16106 16113 dtrace_devi = NULL;
16107 16114
16108 16115 ddi_soft_state_fini(&dtrace_softstate);
16109 16116
16110 16117 ASSERT(dtrace_vtime_references == 0);
16111 16118 ASSERT(dtrace_opens == 0);
16112 16119 ASSERT(dtrace_retained == NULL);
16113 16120
16114 16121 mutex_exit(&dtrace_lock);
16115 16122 mutex_exit(&dtrace_provider_lock);
16116 16123
16117 16124 /*
16118 16125 * We don't destroy the task queue until after we have dropped our
16119 16126 * locks (taskq_destroy() may block on running tasks). To prevent
16120 16127 * attempting to do work after we have effectively detached but before
16121 16128 * the task queue has been destroyed, all tasks dispatched via the
16122 16129 * task queue must check that DTrace is still attached before
16123 16130 * performing any operation.
16124 16131 */
16125 16132 taskq_destroy(dtrace_taskq);
16126 16133 dtrace_taskq = NULL;
16127 16134
16128 16135 return (DDI_SUCCESS);
16129 16136 }
16130 16137
16131 16138 /*ARGSUSED*/
16132 16139 static int
16133 16140 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
16134 16141 {
16135 16142 int error;
16136 16143
16137 16144 switch (infocmd) {
16138 16145 case DDI_INFO_DEVT2DEVINFO:
16139 16146 *result = (void *)dtrace_devi;
16140 16147 error = DDI_SUCCESS;
16141 16148 break;
16142 16149 case DDI_INFO_DEVT2INSTANCE:
16143 16150 *result = (void *)0;
16144 16151 error = DDI_SUCCESS;
16145 16152 break;
16146 16153 default:
16147 16154 error = DDI_FAILURE;
16148 16155 }
16149 16156 return (error);
16150 16157 }
16151 16158
16152 16159 static struct cb_ops dtrace_cb_ops = {
16153 16160 dtrace_open, /* open */
16154 16161 dtrace_close, /* close */
16155 16162 nulldev, /* strategy */
16156 16163 nulldev, /* print */
16157 16164 nodev, /* dump */
16158 16165 nodev, /* read */
16159 16166 nodev, /* write */
16160 16167 dtrace_ioctl, /* ioctl */
16161 16168 nodev, /* devmap */
16162 16169 nodev, /* mmap */
16163 16170 nodev, /* segmap */
16164 16171 nochpoll, /* poll */
16165 16172 ddi_prop_op, /* cb_prop_op */
16166 16173 0, /* streamtab */
16167 16174 D_NEW | D_MP /* Driver compatibility flag */
16168 16175 };
16169 16176
16170 16177 static struct dev_ops dtrace_ops = {
16171 16178 DEVO_REV, /* devo_rev */
16172 16179 0, /* refcnt */
16173 16180 dtrace_info, /* get_dev_info */
16174 16181 nulldev, /* identify */
16175 16182 nulldev, /* probe */
16176 16183 dtrace_attach, /* attach */
16177 16184 dtrace_detach, /* detach */
16178 16185 nodev, /* reset */
16179 16186 &dtrace_cb_ops, /* driver operations */
16180 16187 NULL, /* bus operations */
16181 16188 nodev, /* dev power */
16182 16189 ddi_quiesce_not_needed, /* quiesce */
16183 16190 };
16184 16191
16185 16192 static struct modldrv modldrv = {
16186 16193 &mod_driverops, /* module type (this is a pseudo driver) */
16187 16194 "Dynamic Tracing", /* name of module */
16188 16195 &dtrace_ops, /* driver ops */
16189 16196 };
16190 16197
16191 16198 static struct modlinkage modlinkage = {
16192 16199 MODREV_1,
16193 16200 (void *)&modldrv,
16194 16201 NULL
16195 16202 };
16196 16203
16197 16204 int
16198 16205 _init(void)
16199 16206 {
16200 16207 return (mod_install(&modlinkage));
16201 16208 }
16202 16209
16203 16210 int
16204 16211 _info(struct modinfo *modinfop)
16205 16212 {
16206 16213 return (mod_info(&modlinkage, modinfop));
16207 16214 }
16208 16215
16209 16216 int
16210 16217 _fini(void)
16211 16218 {
16212 16219 return (mod_remove(&modlinkage));
16213 16220 }
↓ open down ↓ |
14822 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX