Print this page
2915 DTrace in a zone should see "cpu", "curpsinfo", et al
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/dtrace/dtrace.c
+++ new/usr/src/uts/common/dtrace/dtrace.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 - * Copyright (c) 2011, Joyent, Inc. All rights reserved.
24 + * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /*
28 28 * DTrace - Dynamic Tracing for Solaris
29 29 *
30 30 * This is the implementation of the Solaris Dynamic Tracing framework
31 31 * (DTrace). The user-visible interface to DTrace is described at length in
32 32 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
33 33 * library, the in-kernel DTrace framework, and the DTrace providers are
34 34 * described in the block comments in the <sys/dtrace.h> header file. The
35 35 * internal architecture of DTrace is described in the block comments in the
36 36 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
37 37 * implementation very much assume mastery of all of these sources; if one has
38 38 * an unanswered question about the implementation, one should consult them
39 39 * first.
40 40 *
41 41 * The functions here are ordered roughly as follows:
42 42 *
43 43 * - Probe context functions
44 44 * - Probe hashing functions
45 45 * - Non-probe context utility functions
46 46 * - Matching functions
47 47 * - Provider-to-Framework API functions
48 48 * - Probe management functions
49 49 * - DIF object functions
50 50 * - Format functions
51 51 * - Predicate functions
52 52 * - ECB functions
53 53 * - Buffer functions
54 54 * - Enabling functions
55 55 * - DOF functions
56 56 * - Anonymous enabling functions
57 57 * - Consumer state functions
58 58 * - Helper functions
59 59 * - Hook functions
60 60 * - Driver cookbook functions
61 61 *
62 62 * Each group of functions begins with a block comment labelled the "DTrace
63 63 * [Group] Functions", allowing one to find each block by searching forward
64 64 * on capital-f functions.
65 65 */
66 66 #include <sys/errno.h>
67 67 #include <sys/stat.h>
68 68 #include <sys/modctl.h>
69 69 #include <sys/conf.h>
70 70 #include <sys/systm.h>
71 71 #include <sys/ddi.h>
72 72 #include <sys/sunddi.h>
73 73 #include <sys/cpuvar.h>
74 74 #include <sys/kmem.h>
75 75 #include <sys/strsubr.h>
76 76 #include <sys/sysmacros.h>
77 77 #include <sys/dtrace_impl.h>
78 78 #include <sys/atomic.h>
79 79 #include <sys/cmn_err.h>
80 80 #include <sys/mutex_impl.h>
81 81 #include <sys/rwlock_impl.h>
82 82 #include <sys/ctf_api.h>
83 83 #include <sys/panic.h>
84 84 #include <sys/priv_impl.h>
85 85 #include <sys/policy.h>
86 86 #include <sys/cred_impl.h>
87 87 #include <sys/procfs_isa.h>
88 88 #include <sys/taskq.h>
89 89 #include <sys/mkdev.h>
90 90 #include <sys/kdi.h>
91 91 #include <sys/zone.h>
92 92 #include <sys/socket.h>
93 93 #include <netinet/in.h>
94 94
95 95 /*
96 96 * DTrace Tunable Variables
97 97 *
98 98 * The following variables may be tuned by adding a line to /etc/system that
99 99 * includes both the name of the DTrace module ("dtrace") and the name of the
100 100 * variable. For example:
101 101 *
102 102 * set dtrace:dtrace_destructive_disallow = 1
103 103 *
104 104 * In general, the only variables that one should be tuning this way are those
105 105 * that affect system-wide DTrace behavior, and for which the default behavior
106 106 * is undesirable. Most of these variables are tunable on a per-consumer
107 107 * basis using DTrace options, and need not be tuned on a system-wide basis.
108 108 * When tuning these variables, avoid pathological values; while some attempt
109 109 * is made to verify the integrity of these variables, they are not considered
110 110 * part of the supported interface to DTrace, and they are therefore not
111 111 * checked comprehensively. Further, these variables should not be tuned
112 112 * dynamically via "mdb -kw" or other means; they should only be tuned via
113 113 * /etc/system.
114 114 */
115 115 int dtrace_destructive_disallow = 0;
116 116 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
117 117 size_t dtrace_difo_maxsize = (256 * 1024);
118 118 dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
119 119 size_t dtrace_global_maxsize = (16 * 1024);
120 120 size_t dtrace_actions_max = (16 * 1024);
121 121 size_t dtrace_retain_max = 1024;
122 122 dtrace_optval_t dtrace_helper_actions_max = 1024;
123 123 dtrace_optval_t dtrace_helper_providers_max = 32;
124 124 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
125 125 size_t dtrace_strsize_default = 256;
126 126 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
127 127 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
128 128 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
129 129 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
130 130 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
131 131 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
132 132 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
133 133 dtrace_optval_t dtrace_nspec_default = 1;
134 134 dtrace_optval_t dtrace_specsize_default = 32 * 1024;
135 135 dtrace_optval_t dtrace_stackframes_default = 20;
136 136 dtrace_optval_t dtrace_ustackframes_default = 20;
137 137 dtrace_optval_t dtrace_jstackframes_default = 50;
138 138 dtrace_optval_t dtrace_jstackstrsize_default = 512;
139 139 int dtrace_msgdsize_max = 128;
140 140 hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
141 141 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
142 142 int dtrace_devdepth_max = 32;
143 143 int dtrace_err_verbose;
144 144 hrtime_t dtrace_deadman_interval = NANOSEC;
145 145 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
146 146 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
147 147 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
148 148
149 149 /*
150 150 * DTrace External Variables
151 151 *
152 152 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
153 153 * available to DTrace consumers via the backtick (`) syntax. One of these,
154 154 * dtrace_zero, is made deliberately so: it is provided as a source of
155 155 * well-known, zero-filled memory. While this variable is not documented,
156 156 * it is used by some translators as an implementation detail.
157 157 */
158 158 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
159 159
160 160 /*
161 161 * DTrace Internal Variables
162 162 */
163 163 static dev_info_t *dtrace_devi; /* device info */
164 164 static vmem_t *dtrace_arena; /* probe ID arena */
165 165 static vmem_t *dtrace_minor; /* minor number arena */
166 166 static taskq_t *dtrace_taskq; /* task queue */
167 167 static dtrace_probe_t **dtrace_probes; /* array of all probes */
168 168 static int dtrace_nprobes; /* number of probes */
169 169 static dtrace_provider_t *dtrace_provider; /* provider list */
170 170 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
171 171 static int dtrace_opens; /* number of opens */
172 172 static int dtrace_helpers; /* number of helpers */
173 173 static void *dtrace_softstate; /* softstate pointer */
174 174 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
175 175 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
176 176 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
177 177 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
178 178 static int dtrace_toxranges; /* number of toxic ranges */
179 179 static int dtrace_toxranges_max; /* size of toxic range array */
180 180 static dtrace_anon_t dtrace_anon; /* anonymous enabling */
181 181 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
182 182 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
183 183 static kthread_t *dtrace_panicked; /* panicking thread */
184 184 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
185 185 static dtrace_genid_t dtrace_probegen; /* current probe generation */
186 186 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
187 187 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
188 188 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
189 189 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
190 190 static int dtrace_dynvar_failclean; /* dynvars failed to clean */
191 191
192 192 /*
193 193 * DTrace Locking
194 194 * DTrace is protected by three (relatively coarse-grained) locks:
195 195 *
196 196 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
197 197 * including enabling state, probes, ECBs, consumer state, helper state,
198 198 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
199 199 * probe context is lock-free -- synchronization is handled via the
200 200 * dtrace_sync() cross call mechanism.
201 201 *
202 202 * (2) dtrace_provider_lock is required when manipulating provider state, or
203 203 * when provider state must be held constant.
204 204 *
205 205 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
206 206 * when meta provider state must be held constant.
207 207 *
208 208 * The lock ordering between these three locks is dtrace_meta_lock before
209 209 * dtrace_provider_lock before dtrace_lock. (In particular, there are
210 210 * several places where dtrace_provider_lock is held by the framework as it
211 211 * calls into the providers -- which then call back into the framework,
212 212 * grabbing dtrace_lock.)
213 213 *
214 214 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
215 215 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
216 216 * role as a coarse-grained lock; it is acquired before both of these locks.
217 217 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
218 218 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
219 219 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
220 220 * acquired _between_ dtrace_provider_lock and dtrace_lock.
221 221 */
222 222 static kmutex_t dtrace_lock; /* probe state lock */
223 223 static kmutex_t dtrace_provider_lock; /* provider state lock */
224 224 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
225 225
226 226 /*
227 227 * DTrace Provider Variables
228 228 *
229 229 * These are the variables relating to DTrace as a provider (that is, the
230 230 * provider of the BEGIN, END, and ERROR probes).
231 231 */
232 232 static dtrace_pattr_t dtrace_provider_attr = {
233 233 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
234 234 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
235 235 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
236 236 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
237 237 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
238 238 };
239 239
240 240 static void
241 241 dtrace_nullop(void)
242 242 {}
243 243
244 244 static int
245 245 dtrace_enable_nullop(void)
246 246 {
247 247 return (0);
248 248 }
249 249
250 250 static dtrace_pops_t dtrace_provider_ops = {
251 251 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
252 252 (void (*)(void *, struct modctl *))dtrace_nullop,
253 253 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
254 254 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
255 255 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
256 256 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
257 257 NULL,
258 258 NULL,
259 259 NULL,
260 260 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
261 261 };
262 262
263 263 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
264 264 static dtrace_id_t dtrace_probeid_end; /* special END probe */
265 265 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
266 266
267 267 /*
268 268 * DTrace Helper Tracing Variables
269 269 */
270 270 uint32_t dtrace_helptrace_next = 0;
271 271 uint32_t dtrace_helptrace_nlocals;
272 272 char *dtrace_helptrace_buffer;
273 273 int dtrace_helptrace_bufsize = 512 * 1024;
274 274
275 275 #ifdef DEBUG
276 276 int dtrace_helptrace_enabled = 1;
277 277 #else
278 278 int dtrace_helptrace_enabled = 0;
279 279 #endif
280 280
281 281 /*
282 282 * DTrace Error Hashing
283 283 *
284 284 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
285 285 * table. This is very useful for checking coverage of tests that are
286 286 * expected to induce DIF or DOF processing errors, and may be useful for
287 287 * debugging problems in the DIF code generator or in DOF generation . The
288 288 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
289 289 */
290 290 #ifdef DEBUG
291 291 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
292 292 static const char *dtrace_errlast;
293 293 static kthread_t *dtrace_errthread;
294 294 static kmutex_t dtrace_errlock;
295 295 #endif
296 296
297 297 /*
298 298 * DTrace Macros and Constants
299 299 *
300 300 * These are various macros that are useful in various spots in the
301 301 * implementation, along with a few random constants that have no meaning
302 302 * outside of the implementation. There is no real structure to this cpp
303 303 * mishmash -- but is there ever?
304 304 */
305 305 #define DTRACE_HASHSTR(hash, probe) \
306 306 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
307 307
308 308 #define DTRACE_HASHNEXT(hash, probe) \
309 309 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
310 310
311 311 #define DTRACE_HASHPREV(hash, probe) \
312 312 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
313 313
314 314 #define DTRACE_HASHEQ(hash, lhs, rhs) \
315 315 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
316 316 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
317 317
318 318 #define DTRACE_AGGHASHSIZE_SLEW 17
319 319
320 320 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
321 321
322 322 /*
323 323 * The key for a thread-local variable consists of the lower 61 bits of the
324 324 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
325 325 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
326 326 * equal to a variable identifier. This is necessary (but not sufficient) to
327 327 * assure that global associative arrays never collide with thread-local
328 328 * variables. To guarantee that they cannot collide, we must also define the
329 329 * order for keying dynamic variables. That order is:
330 330 *
331 331 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
332 332 *
333 333 * Because the variable-key and the tls-key are in orthogonal spaces, there is
334 334 * no way for a global variable key signature to match a thread-local key
335 335 * signature.
336 336 */
337 337 #define DTRACE_TLS_THRKEY(where) { \
338 338 uint_t intr = 0; \
339 339 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
340 340 for (; actv; actv >>= 1) \
341 341 intr++; \
342 342 ASSERT(intr < (1 << 3)); \
343 343 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
344 344 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
345 345 }
346 346
347 347 #define DT_BSWAP_8(x) ((x) & 0xff)
348 348 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
349 349 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
350 350 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
351 351
352 352 #define DT_MASK_LO 0x00000000FFFFFFFFULL
353 353
354 354 #define DTRACE_STORE(type, tomax, offset, what) \
355 355 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
356 356
357 357 #ifndef __i386
358 358 #define DTRACE_ALIGNCHECK(addr, size, flags) \
359 359 if (addr & (size - 1)) { \
360 360 *flags |= CPU_DTRACE_BADALIGN; \
361 361 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
362 362 return (0); \
363 363 }
364 364 #else
↓ open down ↓ |
330 lines elided |
↑ open up ↑ |
365 365 #define DTRACE_ALIGNCHECK(addr, size, flags)
366 366 #endif
367 367
368 368 /*
369 369 * Test whether a range of memory starting at testaddr of size testsz falls
370 370 * within the range of memory described by addr, sz. We take care to avoid
371 371 * problems with overflow and underflow of the unsigned quantities, and
372 372 * disallow all negative sizes. Ranges of size 0 are allowed.
373 373 */
374 374 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
375 - ((testaddr) - (baseaddr) < (basesz) && \
376 - (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
375 + ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
376 + (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
377 377 (testaddr) + (testsz) >= (testaddr))
378 378
379 379 /*
380 380 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
381 381 * alloc_sz on the righthand side of the comparison in order to avoid overflow
382 382 * or underflow in the comparison with it. This is simpler than the INRANGE
383 383 * check above, because we know that the dtms_scratch_ptr is valid in the
384 384 * range. Allocations of size zero are allowed.
385 385 */
386 386 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
387 387 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
388 388 (mstate)->dtms_scratch_ptr >= (alloc_sz))
389 389
390 390 #define DTRACE_LOADFUNC(bits) \
391 391 /*CSTYLED*/ \
392 392 uint##bits##_t \
393 393 dtrace_load##bits(uintptr_t addr) \
394 394 { \
395 395 size_t size = bits / NBBY; \
396 396 /*CSTYLED*/ \
397 397 uint##bits##_t rval; \
398 398 int i; \
399 399 volatile uint16_t *flags = (volatile uint16_t *) \
400 400 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
401 401 \
402 402 DTRACE_ALIGNCHECK(addr, size, flags); \
403 403 \
404 404 for (i = 0; i < dtrace_toxranges; i++) { \
405 405 if (addr >= dtrace_toxrange[i].dtt_limit) \
406 406 continue; \
407 407 \
408 408 if (addr + size <= dtrace_toxrange[i].dtt_base) \
409 409 continue; \
410 410 \
411 411 /* \
412 412 * This address falls within a toxic region; return 0. \
413 413 */ \
414 414 *flags |= CPU_DTRACE_BADADDR; \
415 415 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
416 416 return (0); \
417 417 } \
418 418 \
419 419 *flags |= CPU_DTRACE_NOFAULT; \
420 420 /*CSTYLED*/ \
421 421 rval = *((volatile uint##bits##_t *)addr); \
422 422 *flags &= ~CPU_DTRACE_NOFAULT; \
423 423 \
424 424 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
425 425 }
426 426
427 427 #ifdef _LP64
428 428 #define dtrace_loadptr dtrace_load64
429 429 #else
430 430 #define dtrace_loadptr dtrace_load32
431 431 #endif
432 432
433 433 #define DTRACE_DYNHASH_FREE 0
434 434 #define DTRACE_DYNHASH_SINK 1
435 435 #define DTRACE_DYNHASH_VALID 2
436 436
437 437 #define DTRACE_MATCH_FAIL -1
438 438 #define DTRACE_MATCH_NEXT 0
439 439 #define DTRACE_MATCH_DONE 1
440 440 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
441 441 #define DTRACE_STATE_ALIGN 64
442 442
443 443 #define DTRACE_FLAGS2FLT(flags) \
444 444 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
445 445 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
446 446 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
447 447 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
448 448 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
449 449 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
450 450 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
451 451 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
452 452 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
453 453 DTRACEFLT_UNKNOWN)
454 454
455 455 #define DTRACEACT_ISSTRING(act) \
456 456 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
457 457 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
458 458
459 459 static size_t dtrace_strlen(const char *, size_t);
460 460 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
461 461 static void dtrace_enabling_provide(dtrace_provider_t *);
462 462 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
463 463 static void dtrace_enabling_matchall(void);
464 464 static void dtrace_enabling_reap(void);
465 465 static dtrace_state_t *dtrace_anon_grab(void);
466 466 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
467 467 dtrace_state_t *, uint64_t, uint64_t);
468 468 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
469 469 static void dtrace_buffer_drop(dtrace_buffer_t *);
470 470 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
471 471 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
472 472 dtrace_state_t *, dtrace_mstate_t *);
473 473 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
474 474 dtrace_optval_t);
475 475 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
476 476 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
477 +static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
477 478
478 479 /*
479 480 * DTrace Probe Context Functions
480 481 *
481 482 * These functions are called from probe context. Because probe context is
482 483 * any context in which C may be called, arbitrarily locks may be held,
483 484 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
484 485 * As a result, functions called from probe context may only call other DTrace
485 486 * support functions -- they may not interact at all with the system at large.
486 487 * (Note that the ASSERT macro is made probe-context safe by redefining it in
487 488 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
488 489 * loads are to be performed from probe context, they _must_ be in terms of
489 490 * the safe dtrace_load*() variants.
490 491 *
491 492 * Some functions in this block are not actually called from probe context;
492 493 * for these functions, there will be a comment above the function reading
493 494 * "Note: not called from probe context."
494 495 */
495 496 void
496 497 dtrace_panic(const char *format, ...)
497 498 {
498 499 va_list alist;
499 500
500 501 va_start(alist, format);
501 502 dtrace_vpanic(format, alist);
502 503 va_end(alist);
503 504 }
504 505
505 506 int
506 507 dtrace_assfail(const char *a, const char *f, int l)
507 508 {
508 509 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
509 510
510 511 /*
511 512 * We just need something here that even the most clever compiler
512 513 * cannot optimize away.
513 514 */
514 515 return (a[(uintptr_t)f]);
515 516 }
516 517
517 518 /*
518 519 * Atomically increment a specified error counter from probe context.
519 520 */
520 521 static void
521 522 dtrace_error(uint32_t *counter)
522 523 {
523 524 /*
524 525 * Most counters stored to in probe context are per-CPU counters.
525 526 * However, there are some error conditions that are sufficiently
526 527 * arcane that they don't merit per-CPU storage. If these counters
527 528 * are incremented concurrently on different CPUs, scalability will be
528 529 * adversely affected -- but we don't expect them to be white-hot in a
529 530 * correctly constructed enabling...
530 531 */
531 532 uint32_t oval, nval;
532 533
533 534 do {
534 535 oval = *counter;
535 536
536 537 if ((nval = oval + 1) == 0) {
537 538 /*
538 539 * If the counter would wrap, set it to 1 -- assuring
539 540 * that the counter is never zero when we have seen
540 541 * errors. (The counter must be 32-bits because we
541 542 * aren't guaranteed a 64-bit compare&swap operation.)
542 543 * To save this code both the infamy of being fingered
543 544 * by a priggish news story and the indignity of being
544 545 * the target of a neo-puritan witch trial, we're
545 546 * carefully avoiding any colorful description of the
546 547 * likelihood of this condition -- but suffice it to
547 548 * say that it is only slightly more likely than the
548 549 * overflow of predicate cache IDs, as discussed in
549 550 * dtrace_predicate_create().
550 551 */
551 552 nval = 1;
552 553 }
553 554 } while (dtrace_cas32(counter, oval, nval) != oval);
554 555 }
555 556
556 557 /*
557 558 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
558 559 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
559 560 */
560 561 DTRACE_LOADFUNC(8)
561 562 DTRACE_LOADFUNC(16)
562 563 DTRACE_LOADFUNC(32)
563 564 DTRACE_LOADFUNC(64)
564 565
565 566 static int
566 567 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
567 568 {
568 569 if (dest < mstate->dtms_scratch_base)
569 570 return (0);
570 571
571 572 if (dest + size < dest)
572 573 return (0);
573 574
574 575 if (dest + size > mstate->dtms_scratch_ptr)
575 576 return (0);
576 577
577 578 return (1);
578 579 }
579 580
580 581 static int
581 582 dtrace_canstore_statvar(uint64_t addr, size_t sz,
582 583 dtrace_statvar_t **svars, int nsvars)
583 584 {
584 585 int i;
585 586
586 587 for (i = 0; i < nsvars; i++) {
587 588 dtrace_statvar_t *svar = svars[i];
588 589
589 590 if (svar == NULL || svar->dtsv_size == 0)
590 591 continue;
591 592
592 593 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size))
593 594 return (1);
594 595 }
595 596
596 597 return (0);
597 598 }
598 599
599 600 /*
600 601 * Check to see if the address is within a memory region to which a store may
601 602 * be issued. This includes the DTrace scratch areas, and any DTrace variable
602 603 * region. The caller of dtrace_canstore() is responsible for performing any
603 604 * alignment checks that are needed before stores are actually executed.
604 605 */
605 606 static int
606 607 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
607 608 dtrace_vstate_t *vstate)
608 609 {
609 610 /*
610 611 * First, check to see if the address is in scratch space...
↓ open down ↓ |
124 lines elided |
↑ open up ↑ |
611 612 */
612 613 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
613 614 mstate->dtms_scratch_size))
614 615 return (1);
615 616
616 617 /*
617 618 * Now check to see if it's a dynamic variable. This check will pick
618 619 * up both thread-local variables and any global dynamically-allocated
619 620 * variables.
620 621 */
621 - if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
622 + if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
622 623 vstate->dtvs_dynvars.dtds_size)) {
623 624 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
624 625 uintptr_t base = (uintptr_t)dstate->dtds_base +
625 626 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
626 627 uintptr_t chunkoffs;
627 628
628 629 /*
629 630 * Before we assume that we can store here, we need to make
630 631 * sure that it isn't in our metadata -- storing to our
631 632 * dynamic variable metadata would corrupt our state. For
632 633 * the range to not include any dynamic variable metadata,
633 634 * it must:
634 635 *
635 636 * (1) Start above the hash table that is at the base of
636 637 * the dynamic variable space
637 638 *
638 639 * (2) Have a starting chunk offset that is beyond the
639 640 * dtrace_dynvar_t that is at the base of every chunk
640 641 *
641 642 * (3) Not span a chunk boundary
642 643 *
643 644 */
644 645 if (addr < base)
645 646 return (0);
646 647
647 648 chunkoffs = (addr - base) % dstate->dtds_chunksize;
648 649
649 650 if (chunkoffs < sizeof (dtrace_dynvar_t))
650 651 return (0);
651 652
652 653 if (chunkoffs + sz > dstate->dtds_chunksize)
653 654 return (0);
654 655
655 656 return (1);
656 657 }
657 658
658 659 /*
659 660 * Finally, check the static local and global variables. These checks
660 661 * take the longest, so we perform them last.
661 662 */
662 663 if (dtrace_canstore_statvar(addr, sz,
663 664 vstate->dtvs_locals, vstate->dtvs_nlocals))
664 665 return (1);
665 666
666 667 if (dtrace_canstore_statvar(addr, sz,
667 668 vstate->dtvs_globals, vstate->dtvs_nglobals))
668 669 return (1);
669 670
670 671 return (0);
671 672 }
672 673
673 674
674 675 /*
675 676 * Convenience routine to check to see if the address is within a memory
676 677 * region in which a load may be issued given the user's privilege level;
677 678 * if not, it sets the appropriate error flags and loads 'addr' into the
678 679 * illegal value slot.
679 680 *
680 681 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
681 682 * appropriate memory access protection.
682 683 */
683 684 static int
684 685 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
685 686 dtrace_vstate_t *vstate)
686 687 {
687 688 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
688 689
689 690 /*
690 691 * If we hold the privilege to read from kernel memory, then
691 692 * everything is readable.
692 693 */
693 694 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
694 695 return (1);
↓ open down ↓ |
63 lines elided |
↑ open up ↑ |
695 696
696 697 /*
697 698 * You can obviously read that which you can store.
698 699 */
699 700 if (dtrace_canstore(addr, sz, mstate, vstate))
700 701 return (1);
701 702
702 703 /*
703 704 * We're allowed to read from our own string table.
704 705 */
705 - if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
706 + if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
706 707 mstate->dtms_difo->dtdo_strlen))
707 708 return (1);
708 709
710 + if (vstate->dtvs_state != NULL &&
711 + dtrace_priv_proc(vstate->dtvs_state, mstate)) {
712 + proc_t *p;
713 +
714 + /*
715 + * When we have privileges to the current process, there are
716 + * several context-related kernel structures that are safe to
717 + * read, even absent the privilege to read from kernel memory.
718 + * These reads are safe because these structures contain only
719 + * state that (1) we're permitted to read, (2) is harmless or
720 + * (3) contains pointers to additional kernel state that we're
721 + * not permitted to read (and as such, do not present an
722 + * opportunity for privilege escalation). Finally (and
723 + * critically), because of the nature of their relation with
724 + * the current thread context, the memory associated with these
725 + * structures cannot change over the duration of probe context,
726 + * and it is therefore impossible for this memory to be
727 + * deallocated and reallocated as something else while it's
728 + * being operated upon.
729 + */
730 + if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
731 + return (1);
732 +
733 + if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
734 + sz, curthread->t_procp, sizeof (proc_t))) {
735 + return (1);
736 + }
737 +
738 + if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
739 + curthread->t_cred, sizeof (cred_t))) {
740 + return (1);
741 + }
742 +
743 + if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
744 + &(p->p_pidp->pid_id), sizeof (pid_t))) {
745 + return (1);
746 + }
747 +
748 + if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
749 + curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
750 + return (1);
751 + }
752 + }
753 +
709 754 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
710 755 *illval = addr;
711 756 return (0);
712 757 }
713 758
714 759 /*
715 760 * Convenience routine to check to see if a given string is within a memory
716 761 * region in which a load may be issued given the user's privilege level;
717 762 * this exists so that we don't need to issue unnecessary dtrace_strlen()
718 763 * calls in the event that the user has all privileges.
719 764 */
720 765 static int
721 766 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
722 767 dtrace_vstate_t *vstate)
723 768 {
724 769 size_t strsz;
725 770
726 771 /*
727 772 * If we hold the privilege to read from kernel memory, then
728 773 * everything is readable.
729 774 */
730 775 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
731 776 return (1);
732 777
733 778 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz);
734 779 if (dtrace_canload(addr, strsz, mstate, vstate))
735 780 return (1);
736 781
737 782 return (0);
738 783 }
739 784
740 785 /*
741 786 * Convenience routine to check to see if a given variable is within a memory
742 787 * region in which a load may be issued given the user's privilege level.
743 788 */
744 789 static int
745 790 dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
746 791 dtrace_vstate_t *vstate)
747 792 {
748 793 size_t sz;
749 794 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
750 795
751 796 /*
752 797 * If we hold the privilege to read from kernel memory, then
753 798 * everything is readable.
754 799 */
755 800 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
756 801 return (1);
757 802
758 803 if (type->dtdt_kind == DIF_TYPE_STRING)
759 804 sz = dtrace_strlen(src,
760 805 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
761 806 else
762 807 sz = type->dtdt_size;
763 808
764 809 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
765 810 }
766 811
767 812 /*
768 813 * Compare two strings using safe loads.
769 814 */
770 815 static int
771 816 dtrace_strncmp(char *s1, char *s2, size_t limit)
772 817 {
773 818 uint8_t c1, c2;
774 819 volatile uint16_t *flags;
775 820
776 821 if (s1 == s2 || limit == 0)
777 822 return (0);
778 823
779 824 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
780 825
781 826 do {
782 827 if (s1 == NULL) {
783 828 c1 = '\0';
784 829 } else {
785 830 c1 = dtrace_load8((uintptr_t)s1++);
786 831 }
787 832
788 833 if (s2 == NULL) {
789 834 c2 = '\0';
790 835 } else {
791 836 c2 = dtrace_load8((uintptr_t)s2++);
792 837 }
793 838
794 839 if (c1 != c2)
795 840 return (c1 - c2);
796 841 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
797 842
798 843 return (0);
799 844 }
800 845
801 846 /*
802 847 * Compute strlen(s) for a string using safe memory accesses. The additional
803 848 * len parameter is used to specify a maximum length to ensure completion.
804 849 */
805 850 static size_t
806 851 dtrace_strlen(const char *s, size_t lim)
807 852 {
808 853 uint_t len;
809 854
810 855 for (len = 0; len != lim; len++) {
811 856 if (dtrace_load8((uintptr_t)s++) == '\0')
812 857 break;
813 858 }
814 859
815 860 return (len);
816 861 }
817 862
818 863 /*
819 864 * Check if an address falls within a toxic region.
820 865 */
821 866 static int
822 867 dtrace_istoxic(uintptr_t kaddr, size_t size)
823 868 {
824 869 uintptr_t taddr, tsize;
825 870 int i;
826 871
827 872 for (i = 0; i < dtrace_toxranges; i++) {
828 873 taddr = dtrace_toxrange[i].dtt_base;
829 874 tsize = dtrace_toxrange[i].dtt_limit - taddr;
830 875
831 876 if (kaddr - taddr < tsize) {
832 877 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
833 878 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
834 879 return (1);
835 880 }
836 881
837 882 if (taddr - kaddr < size) {
838 883 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
839 884 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
840 885 return (1);
841 886 }
842 887 }
843 888
844 889 return (0);
845 890 }
846 891
847 892 /*
848 893 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
849 894 * memory specified by the DIF program. The dst is assumed to be safe memory
850 895 * that we can store to directly because it is managed by DTrace. As with
851 896 * standard bcopy, overlapping copies are handled properly.
852 897 */
853 898 static void
854 899 dtrace_bcopy(const void *src, void *dst, size_t len)
855 900 {
856 901 if (len != 0) {
857 902 uint8_t *s1 = dst;
858 903 const uint8_t *s2 = src;
859 904
860 905 if (s1 <= s2) {
861 906 do {
862 907 *s1++ = dtrace_load8((uintptr_t)s2++);
863 908 } while (--len != 0);
864 909 } else {
865 910 s2 += len;
866 911 s1 += len;
867 912
868 913 do {
869 914 *--s1 = dtrace_load8((uintptr_t)--s2);
870 915 } while (--len != 0);
871 916 }
872 917 }
873 918 }
874 919
875 920 /*
876 921 * Copy src to dst using safe memory accesses, up to either the specified
877 922 * length, or the point that a nul byte is encountered. The src is assumed to
878 923 * be unsafe memory specified by the DIF program. The dst is assumed to be
879 924 * safe memory that we can store to directly because it is managed by DTrace.
880 925 * Unlike dtrace_bcopy(), overlapping regions are not handled.
881 926 */
882 927 static void
883 928 dtrace_strcpy(const void *src, void *dst, size_t len)
884 929 {
885 930 if (len != 0) {
886 931 uint8_t *s1 = dst, c;
887 932 const uint8_t *s2 = src;
888 933
889 934 do {
890 935 *s1++ = c = dtrace_load8((uintptr_t)s2++);
891 936 } while (--len != 0 && c != '\0');
892 937 }
893 938 }
894 939
895 940 /*
896 941 * Copy src to dst, deriving the size and type from the specified (BYREF)
897 942 * variable type. The src is assumed to be unsafe memory specified by the DIF
898 943 * program. The dst is assumed to be DTrace variable memory that is of the
899 944 * specified type; we assume that we can store to directly.
900 945 */
901 946 static void
902 947 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type)
903 948 {
904 949 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
905 950
906 951 if (type->dtdt_kind == DIF_TYPE_STRING) {
907 952 dtrace_strcpy(src, dst, type->dtdt_size);
908 953 } else {
909 954 dtrace_bcopy(src, dst, type->dtdt_size);
910 955 }
911 956 }
912 957
913 958 /*
914 959 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
915 960 * unsafe memory specified by the DIF program. The s2 data is assumed to be
916 961 * safe memory that we can access directly because it is managed by DTrace.
917 962 */
918 963 static int
919 964 dtrace_bcmp(const void *s1, const void *s2, size_t len)
920 965 {
921 966 volatile uint16_t *flags;
922 967
923 968 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
924 969
925 970 if (s1 == s2)
926 971 return (0);
927 972
928 973 if (s1 == NULL || s2 == NULL)
929 974 return (1);
930 975
931 976 if (s1 != s2 && len != 0) {
932 977 const uint8_t *ps1 = s1;
933 978 const uint8_t *ps2 = s2;
934 979
935 980 do {
936 981 if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
937 982 return (1);
938 983 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
939 984 }
940 985 return (0);
941 986 }
942 987
943 988 /*
944 989 * Zero the specified region using a simple byte-by-byte loop. Note that this
945 990 * is for safe DTrace-managed memory only.
946 991 */
947 992 static void
948 993 dtrace_bzero(void *dst, size_t len)
949 994 {
950 995 uchar_t *cp;
951 996
952 997 for (cp = dst; len != 0; len--)
953 998 *cp++ = 0;
954 999 }
955 1000
956 1001 static void
957 1002 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
958 1003 {
959 1004 uint64_t result[2];
960 1005
961 1006 result[0] = addend1[0] + addend2[0];
962 1007 result[1] = addend1[1] + addend2[1] +
963 1008 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
964 1009
965 1010 sum[0] = result[0];
966 1011 sum[1] = result[1];
967 1012 }
968 1013
969 1014 /*
970 1015 * Shift the 128-bit value in a by b. If b is positive, shift left.
971 1016 * If b is negative, shift right.
972 1017 */
973 1018 static void
974 1019 dtrace_shift_128(uint64_t *a, int b)
975 1020 {
976 1021 uint64_t mask;
977 1022
978 1023 if (b == 0)
979 1024 return;
980 1025
981 1026 if (b < 0) {
982 1027 b = -b;
983 1028 if (b >= 64) {
984 1029 a[0] = a[1] >> (b - 64);
985 1030 a[1] = 0;
986 1031 } else {
987 1032 a[0] >>= b;
988 1033 mask = 1LL << (64 - b);
989 1034 mask -= 1;
990 1035 a[0] |= ((a[1] & mask) << (64 - b));
991 1036 a[1] >>= b;
992 1037 }
993 1038 } else {
994 1039 if (b >= 64) {
995 1040 a[1] = a[0] << (b - 64);
996 1041 a[0] = 0;
997 1042 } else {
998 1043 a[1] <<= b;
999 1044 mask = a[0] >> (64 - b);
1000 1045 a[1] |= mask;
1001 1046 a[0] <<= b;
1002 1047 }
1003 1048 }
1004 1049 }
1005 1050
1006 1051 /*
1007 1052 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1008 1053 * use native multiplication on those, and then re-combine into the
1009 1054 * resulting 128-bit value.
1010 1055 *
1011 1056 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1012 1057 * hi1 * hi2 << 64 +
1013 1058 * hi1 * lo2 << 32 +
1014 1059 * hi2 * lo1 << 32 +
1015 1060 * lo1 * lo2
1016 1061 */
1017 1062 static void
1018 1063 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1019 1064 {
1020 1065 uint64_t hi1, hi2, lo1, lo2;
1021 1066 uint64_t tmp[2];
1022 1067
1023 1068 hi1 = factor1 >> 32;
1024 1069 hi2 = factor2 >> 32;
1025 1070
1026 1071 lo1 = factor1 & DT_MASK_LO;
1027 1072 lo2 = factor2 & DT_MASK_LO;
1028 1073
1029 1074 product[0] = lo1 * lo2;
1030 1075 product[1] = hi1 * hi2;
1031 1076
1032 1077 tmp[0] = hi1 * lo2;
1033 1078 tmp[1] = 0;
1034 1079 dtrace_shift_128(tmp, 32);
1035 1080 dtrace_add_128(product, tmp, product);
1036 1081
1037 1082 tmp[0] = hi2 * lo1;
1038 1083 tmp[1] = 0;
1039 1084 dtrace_shift_128(tmp, 32);
1040 1085 dtrace_add_128(product, tmp, product);
1041 1086 }
1042 1087
1043 1088 /*
1044 1089 * This privilege check should be used by actions and subroutines to
1045 1090 * verify that the user credentials of the process that enabled the
1046 1091 * invoking ECB match the target credentials
1047 1092 */
1048 1093 static int
1049 1094 dtrace_priv_proc_common_user(dtrace_state_t *state)
1050 1095 {
1051 1096 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1052 1097
1053 1098 /*
1054 1099 * We should always have a non-NULL state cred here, since if cred
1055 1100 * is null (anonymous tracing), we fast-path bypass this routine.
1056 1101 */
1057 1102 ASSERT(s_cr != NULL);
1058 1103
1059 1104 if ((cr = CRED()) != NULL &&
1060 1105 s_cr->cr_uid == cr->cr_uid &&
1061 1106 s_cr->cr_uid == cr->cr_ruid &&
1062 1107 s_cr->cr_uid == cr->cr_suid &&
1063 1108 s_cr->cr_gid == cr->cr_gid &&
1064 1109 s_cr->cr_gid == cr->cr_rgid &&
1065 1110 s_cr->cr_gid == cr->cr_sgid)
1066 1111 return (1);
1067 1112
1068 1113 return (0);
1069 1114 }
1070 1115
1071 1116 /*
1072 1117 * This privilege check should be used by actions and subroutines to
1073 1118 * verify that the zone of the process that enabled the invoking ECB
1074 1119 * matches the target credentials
1075 1120 */
1076 1121 static int
1077 1122 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1078 1123 {
1079 1124 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1080 1125
1081 1126 /*
1082 1127 * We should always have a non-NULL state cred here, since if cred
1083 1128 * is null (anonymous tracing), we fast-path bypass this routine.
1084 1129 */
1085 1130 ASSERT(s_cr != NULL);
1086 1131
1087 1132 if ((cr = CRED()) != NULL &&
1088 1133 s_cr->cr_zone == cr->cr_zone)
1089 1134 return (1);
1090 1135
1091 1136 return (0);
1092 1137 }
1093 1138
1094 1139 /*
1095 1140 * This privilege check should be used by actions and subroutines to
1096 1141 * verify that the process has not setuid or changed credentials.
1097 1142 */
1098 1143 static int
1099 1144 dtrace_priv_proc_common_nocd()
1100 1145 {
1101 1146 proc_t *proc;
1102 1147
1103 1148 if ((proc = ttoproc(curthread)) != NULL &&
1104 1149 !(proc->p_flag & SNOCD))
1105 1150 return (1);
1106 1151
1107 1152 return (0);
1108 1153 }
1109 1154
1110 1155 static int
1111 1156 dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
1112 1157 {
1113 1158 int action = state->dts_cred.dcr_action;
1114 1159
1115 1160 if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
1116 1161 goto bad;
1117 1162
1118 1163 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1119 1164 dtrace_priv_proc_common_zone(state) == 0)
1120 1165 goto bad;
1121 1166
1122 1167 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1123 1168 dtrace_priv_proc_common_user(state) == 0)
1124 1169 goto bad;
1125 1170
1126 1171 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1127 1172 dtrace_priv_proc_common_nocd() == 0)
1128 1173 goto bad;
1129 1174
1130 1175 return (1);
1131 1176
1132 1177 bad:
1133 1178 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1134 1179
1135 1180 return (0);
1136 1181 }
1137 1182
1138 1183 static int
1139 1184 dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
1140 1185 {
1141 1186 if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
1142 1187 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1143 1188 return (1);
1144 1189
1145 1190 if (dtrace_priv_proc_common_zone(state) &&
1146 1191 dtrace_priv_proc_common_user(state) &&
1147 1192 dtrace_priv_proc_common_nocd())
1148 1193 return (1);
1149 1194 }
1150 1195
1151 1196 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1152 1197
1153 1198 return (0);
1154 1199 }
1155 1200
1156 1201 static int
1157 1202 dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
1158 1203 {
1159 1204 if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
1160 1205 (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
1161 1206 return (1);
1162 1207
1163 1208 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1164 1209
1165 1210 return (0);
1166 1211 }
1167 1212
1168 1213 static int
1169 1214 dtrace_priv_kernel(dtrace_state_t *state)
1170 1215 {
1171 1216 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1172 1217 return (1);
1173 1218
1174 1219 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1175 1220
1176 1221 return (0);
1177 1222 }
1178 1223
1179 1224 static int
1180 1225 dtrace_priv_kernel_destructive(dtrace_state_t *state)
1181 1226 {
1182 1227 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1183 1228 return (1);
1184 1229
1185 1230 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1186 1231
1187 1232 return (0);
1188 1233 }
1189 1234
1190 1235 /*
1191 1236 * Determine if the dte_cond of the specified ECB allows for processing of
1192 1237 * the current probe to continue. Note that this routine may allow continued
1193 1238 * processing, but with access(es) stripped from the mstate's dtms_access
1194 1239 * field.
1195 1240 */
1196 1241 static int
1197 1242 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1198 1243 dtrace_ecb_t *ecb)
1199 1244 {
1200 1245 dtrace_probe_t *probe = ecb->dte_probe;
1201 1246 dtrace_provider_t *prov = probe->dtpr_provider;
1202 1247 dtrace_pops_t *pops = &prov->dtpv_pops;
1203 1248 int mode = DTRACE_MODE_NOPRIV_DROP;
1204 1249
1205 1250 ASSERT(ecb->dte_cond);
1206 1251
1207 1252 if (pops->dtps_mode != NULL) {
1208 1253 mode = pops->dtps_mode(prov->dtpv_arg,
1209 1254 probe->dtpr_id, probe->dtpr_arg);
1210 1255
1211 1256 ASSERT((mode & DTRACE_MODE_USER) ||
1212 1257 (mode & DTRACE_MODE_KERNEL));
1213 1258 ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
1214 1259 (mode & DTRACE_MODE_NOPRIV_DROP));
1215 1260 }
1216 1261
1217 1262 /*
1218 1263 * If the dte_cond bits indicate that this consumer is only allowed to
1219 1264 * see user-mode firings of this probe, call the provider's dtps_mode()
1220 1265 * entry point to check that the probe was fired while in a user
1221 1266 * context. If that's not the case, use the policy specified by the
1222 1267 * provider to determine if we drop the probe or merely restrict
1223 1268 * operation.
1224 1269 */
1225 1270 if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1226 1271 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1227 1272
1228 1273 if (!(mode & DTRACE_MODE_USER)) {
1229 1274 if (mode & DTRACE_MODE_NOPRIV_DROP)
1230 1275 return (0);
1231 1276
1232 1277 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1233 1278 }
1234 1279 }
1235 1280
1236 1281 /*
1237 1282 * This is more subtle than it looks. We have to be absolutely certain
1238 1283 * that CRED() isn't going to change out from under us so it's only
1239 1284 * legit to examine that structure if we're in constrained situations.
1240 1285 * Currently, the only times we'll this check is if a non-super-user
1241 1286 * has enabled the profile or syscall providers -- providers that
1242 1287 * allow visibility of all processes. For the profile case, the check
1243 1288 * above will ensure that we're examining a user context.
1244 1289 */
1245 1290 if (ecb->dte_cond & DTRACE_COND_OWNER) {
1246 1291 cred_t *cr;
1247 1292 cred_t *s_cr = state->dts_cred.dcr_cred;
1248 1293 proc_t *proc;
1249 1294
1250 1295 ASSERT(s_cr != NULL);
1251 1296
1252 1297 if ((cr = CRED()) == NULL ||
1253 1298 s_cr->cr_uid != cr->cr_uid ||
1254 1299 s_cr->cr_uid != cr->cr_ruid ||
1255 1300 s_cr->cr_uid != cr->cr_suid ||
1256 1301 s_cr->cr_gid != cr->cr_gid ||
1257 1302 s_cr->cr_gid != cr->cr_rgid ||
1258 1303 s_cr->cr_gid != cr->cr_sgid ||
1259 1304 (proc = ttoproc(curthread)) == NULL ||
1260 1305 (proc->p_flag & SNOCD)) {
1261 1306 if (mode & DTRACE_MODE_NOPRIV_DROP)
1262 1307 return (0);
1263 1308
1264 1309 mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
1265 1310 }
1266 1311 }
1267 1312
1268 1313 /*
1269 1314 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
1270 1315 * in our zone, check to see if our mode policy is to restrict rather
1271 1316 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1272 1317 * and DTRACE_ACCESS_ARGS
1273 1318 */
1274 1319 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1275 1320 cred_t *cr;
1276 1321 cred_t *s_cr = state->dts_cred.dcr_cred;
1277 1322
1278 1323 ASSERT(s_cr != NULL);
1279 1324
1280 1325 if ((cr = CRED()) == NULL ||
1281 1326 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1282 1327 if (mode & DTRACE_MODE_NOPRIV_DROP)
1283 1328 return (0);
1284 1329
1285 1330 mstate->dtms_access &=
1286 1331 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1287 1332 }
1288 1333 }
1289 1334
1290 1335 return (1);
1291 1336 }
1292 1337
1293 1338 /*
1294 1339 * Note: not called from probe context. This function is called
1295 1340 * asynchronously (and at a regular interval) from outside of probe context to
1296 1341 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1297 1342 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1298 1343 */
1299 1344 void
1300 1345 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1301 1346 {
1302 1347 dtrace_dynvar_t *dirty;
1303 1348 dtrace_dstate_percpu_t *dcpu;
1304 1349 dtrace_dynvar_t **rinsep;
1305 1350 int i, j, work = 0;
1306 1351
1307 1352 for (i = 0; i < NCPU; i++) {
1308 1353 dcpu = &dstate->dtds_percpu[i];
1309 1354 rinsep = &dcpu->dtdsc_rinsing;
1310 1355
1311 1356 /*
1312 1357 * If the dirty list is NULL, there is no dirty work to do.
1313 1358 */
1314 1359 if (dcpu->dtdsc_dirty == NULL)
1315 1360 continue;
1316 1361
1317 1362 if (dcpu->dtdsc_rinsing != NULL) {
1318 1363 /*
1319 1364 * If the rinsing list is non-NULL, then it is because
1320 1365 * this CPU was selected to accept another CPU's
1321 1366 * dirty list -- and since that time, dirty buffers
1322 1367 * have accumulated. This is a highly unlikely
1323 1368 * condition, but we choose to ignore the dirty
1324 1369 * buffers -- they'll be picked up a future cleanse.
1325 1370 */
1326 1371 continue;
1327 1372 }
1328 1373
1329 1374 if (dcpu->dtdsc_clean != NULL) {
1330 1375 /*
1331 1376 * If the clean list is non-NULL, then we're in a
1332 1377 * situation where a CPU has done deallocations (we
1333 1378 * have a non-NULL dirty list) but no allocations (we
1334 1379 * also have a non-NULL clean list). We can't simply
1335 1380 * move the dirty list into the clean list on this
1336 1381 * CPU, yet we also don't want to allow this condition
1337 1382 * to persist, lest a short clean list prevent a
1338 1383 * massive dirty list from being cleaned (which in
1339 1384 * turn could lead to otherwise avoidable dynamic
1340 1385 * drops). To deal with this, we look for some CPU
1341 1386 * with a NULL clean list, NULL dirty list, and NULL
1342 1387 * rinsing list -- and then we borrow this CPU to
1343 1388 * rinse our dirty list.
1344 1389 */
1345 1390 for (j = 0; j < NCPU; j++) {
1346 1391 dtrace_dstate_percpu_t *rinser;
1347 1392
1348 1393 rinser = &dstate->dtds_percpu[j];
1349 1394
1350 1395 if (rinser->dtdsc_rinsing != NULL)
1351 1396 continue;
1352 1397
1353 1398 if (rinser->dtdsc_dirty != NULL)
1354 1399 continue;
1355 1400
1356 1401 if (rinser->dtdsc_clean != NULL)
1357 1402 continue;
1358 1403
1359 1404 rinsep = &rinser->dtdsc_rinsing;
1360 1405 break;
1361 1406 }
1362 1407
1363 1408 if (j == NCPU) {
1364 1409 /*
1365 1410 * We were unable to find another CPU that
1366 1411 * could accept this dirty list -- we are
1367 1412 * therefore unable to clean it now.
1368 1413 */
1369 1414 dtrace_dynvar_failclean++;
1370 1415 continue;
1371 1416 }
1372 1417 }
1373 1418
1374 1419 work = 1;
1375 1420
1376 1421 /*
1377 1422 * Atomically move the dirty list aside.
1378 1423 */
1379 1424 do {
1380 1425 dirty = dcpu->dtdsc_dirty;
1381 1426
1382 1427 /*
1383 1428 * Before we zap the dirty list, set the rinsing list.
1384 1429 * (This allows for a potential assertion in
1385 1430 * dtrace_dynvar(): if a free dynamic variable appears
1386 1431 * on a hash chain, either the dirty list or the
1387 1432 * rinsing list for some CPU must be non-NULL.)
1388 1433 */
1389 1434 *rinsep = dirty;
1390 1435 dtrace_membar_producer();
1391 1436 } while (dtrace_casptr(&dcpu->dtdsc_dirty,
1392 1437 dirty, NULL) != dirty);
1393 1438 }
1394 1439
1395 1440 if (!work) {
1396 1441 /*
1397 1442 * We have no work to do; we can simply return.
1398 1443 */
1399 1444 return;
1400 1445 }
1401 1446
1402 1447 dtrace_sync();
1403 1448
1404 1449 for (i = 0; i < NCPU; i++) {
1405 1450 dcpu = &dstate->dtds_percpu[i];
1406 1451
1407 1452 if (dcpu->dtdsc_rinsing == NULL)
1408 1453 continue;
1409 1454
1410 1455 /*
1411 1456 * We are now guaranteed that no hash chain contains a pointer
1412 1457 * into this dirty list; we can make it clean.
1413 1458 */
1414 1459 ASSERT(dcpu->dtdsc_clean == NULL);
1415 1460 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1416 1461 dcpu->dtdsc_rinsing = NULL;
1417 1462 }
1418 1463
1419 1464 /*
1420 1465 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1421 1466 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1422 1467 * This prevents a race whereby a CPU incorrectly decides that
1423 1468 * the state should be something other than DTRACE_DSTATE_CLEAN
1424 1469 * after dtrace_dynvar_clean() has completed.
1425 1470 */
1426 1471 dtrace_sync();
1427 1472
1428 1473 dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1429 1474 }
1430 1475
1431 1476 /*
1432 1477 * Depending on the value of the op parameter, this function looks-up,
1433 1478 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1434 1479 * allocation is requested, this function will return a pointer to a
1435 1480 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1436 1481 * variable can be allocated. If NULL is returned, the appropriate counter
1437 1482 * will be incremented.
1438 1483 */
1439 1484 dtrace_dynvar_t *
1440 1485 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
1441 1486 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1442 1487 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1443 1488 {
1444 1489 uint64_t hashval = DTRACE_DYNHASH_VALID;
1445 1490 dtrace_dynhash_t *hash = dstate->dtds_hash;
1446 1491 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1447 1492 processorid_t me = CPU->cpu_id, cpu = me;
1448 1493 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1449 1494 size_t bucket, ksize;
1450 1495 size_t chunksize = dstate->dtds_chunksize;
1451 1496 uintptr_t kdata, lock, nstate;
1452 1497 uint_t i;
1453 1498
1454 1499 ASSERT(nkeys != 0);
1455 1500
1456 1501 /*
1457 1502 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1458 1503 * algorithm. For the by-value portions, we perform the algorithm in
1459 1504 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1460 1505 * bit, and seems to have only a minute effect on distribution. For
1461 1506 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1462 1507 * over each referenced byte. It's painful to do this, but it's much
1463 1508 * better than pathological hash distribution. The efficacy of the
1464 1509 * hashing algorithm (and a comparison with other algorithms) may be
1465 1510 * found by running the ::dtrace_dynstat MDB dcmd.
1466 1511 */
1467 1512 for (i = 0; i < nkeys; i++) {
1468 1513 if (key[i].dttk_size == 0) {
1469 1514 uint64_t val = key[i].dttk_value;
1470 1515
1471 1516 hashval += (val >> 48) & 0xffff;
1472 1517 hashval += (hashval << 10);
1473 1518 hashval ^= (hashval >> 6);
1474 1519
1475 1520 hashval += (val >> 32) & 0xffff;
1476 1521 hashval += (hashval << 10);
1477 1522 hashval ^= (hashval >> 6);
1478 1523
1479 1524 hashval += (val >> 16) & 0xffff;
1480 1525 hashval += (hashval << 10);
1481 1526 hashval ^= (hashval >> 6);
1482 1527
1483 1528 hashval += val & 0xffff;
1484 1529 hashval += (hashval << 10);
1485 1530 hashval ^= (hashval >> 6);
1486 1531 } else {
1487 1532 /*
1488 1533 * This is incredibly painful, but it beats the hell
1489 1534 * out of the alternative.
1490 1535 */
1491 1536 uint64_t j, size = key[i].dttk_size;
1492 1537 uintptr_t base = (uintptr_t)key[i].dttk_value;
1493 1538
1494 1539 if (!dtrace_canload(base, size, mstate, vstate))
1495 1540 break;
1496 1541
1497 1542 for (j = 0; j < size; j++) {
1498 1543 hashval += dtrace_load8(base + j);
1499 1544 hashval += (hashval << 10);
1500 1545 hashval ^= (hashval >> 6);
1501 1546 }
1502 1547 }
1503 1548 }
1504 1549
1505 1550 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1506 1551 return (NULL);
1507 1552
1508 1553 hashval += (hashval << 3);
1509 1554 hashval ^= (hashval >> 11);
1510 1555 hashval += (hashval << 15);
1511 1556
1512 1557 /*
1513 1558 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1514 1559 * comes out to be one of our two sentinel hash values. If this
1515 1560 * actually happens, we set the hashval to be a value known to be a
1516 1561 * non-sentinel value.
1517 1562 */
1518 1563 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1519 1564 hashval = DTRACE_DYNHASH_VALID;
1520 1565
1521 1566 /*
1522 1567 * Yes, it's painful to do a divide here. If the cycle count becomes
1523 1568 * important here, tricks can be pulled to reduce it. (However, it's
1524 1569 * critical that hash collisions be kept to an absolute minimum;
1525 1570 * they're much more painful than a divide.) It's better to have a
1526 1571 * solution that generates few collisions and still keeps things
1527 1572 * relatively simple.
1528 1573 */
1529 1574 bucket = hashval % dstate->dtds_hashsize;
1530 1575
1531 1576 if (op == DTRACE_DYNVAR_DEALLOC) {
1532 1577 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
1533 1578
1534 1579 for (;;) {
1535 1580 while ((lock = *lockp) & 1)
1536 1581 continue;
1537 1582
1538 1583 if (dtrace_casptr((void *)lockp,
1539 1584 (void *)lock, (void *)(lock + 1)) == (void *)lock)
1540 1585 break;
1541 1586 }
1542 1587
1543 1588 dtrace_membar_producer();
1544 1589 }
1545 1590
1546 1591 top:
1547 1592 prev = NULL;
1548 1593 lock = hash[bucket].dtdh_lock;
1549 1594
1550 1595 dtrace_membar_consumer();
1551 1596
1552 1597 start = hash[bucket].dtdh_chain;
1553 1598 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
1554 1599 start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
1555 1600 op != DTRACE_DYNVAR_DEALLOC));
1556 1601
1557 1602 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
1558 1603 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
1559 1604 dtrace_key_t *dkey = &dtuple->dtt_key[0];
1560 1605
1561 1606 if (dvar->dtdv_hashval != hashval) {
1562 1607 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
1563 1608 /*
1564 1609 * We've reached the sink, and therefore the
1565 1610 * end of the hash chain; we can kick out of
1566 1611 * the loop knowing that we have seen a valid
1567 1612 * snapshot of state.
1568 1613 */
1569 1614 ASSERT(dvar->dtdv_next == NULL);
1570 1615 ASSERT(dvar == &dtrace_dynhash_sink);
1571 1616 break;
1572 1617 }
1573 1618
1574 1619 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
1575 1620 /*
1576 1621 * We've gone off the rails: somewhere along
1577 1622 * the line, one of the members of this hash
1578 1623 * chain was deleted. Note that we could also
1579 1624 * detect this by simply letting this loop run
1580 1625 * to completion, as we would eventually hit
1581 1626 * the end of the dirty list. However, we
1582 1627 * want to avoid running the length of the
1583 1628 * dirty list unnecessarily (it might be quite
1584 1629 * long), so we catch this as early as
1585 1630 * possible by detecting the hash marker. In
1586 1631 * this case, we simply set dvar to NULL and
1587 1632 * break; the conditional after the loop will
1588 1633 * send us back to top.
1589 1634 */
1590 1635 dvar = NULL;
1591 1636 break;
1592 1637 }
1593 1638
1594 1639 goto next;
1595 1640 }
1596 1641
1597 1642 if (dtuple->dtt_nkeys != nkeys)
1598 1643 goto next;
1599 1644
1600 1645 for (i = 0; i < nkeys; i++, dkey++) {
1601 1646 if (dkey->dttk_size != key[i].dttk_size)
1602 1647 goto next; /* size or type mismatch */
1603 1648
1604 1649 if (dkey->dttk_size != 0) {
1605 1650 if (dtrace_bcmp(
1606 1651 (void *)(uintptr_t)key[i].dttk_value,
1607 1652 (void *)(uintptr_t)dkey->dttk_value,
1608 1653 dkey->dttk_size))
1609 1654 goto next;
1610 1655 } else {
1611 1656 if (dkey->dttk_value != key[i].dttk_value)
1612 1657 goto next;
1613 1658 }
1614 1659 }
1615 1660
1616 1661 if (op != DTRACE_DYNVAR_DEALLOC)
1617 1662 return (dvar);
1618 1663
1619 1664 ASSERT(dvar->dtdv_next == NULL ||
1620 1665 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
1621 1666
1622 1667 if (prev != NULL) {
1623 1668 ASSERT(hash[bucket].dtdh_chain != dvar);
1624 1669 ASSERT(start != dvar);
1625 1670 ASSERT(prev->dtdv_next == dvar);
1626 1671 prev->dtdv_next = dvar->dtdv_next;
1627 1672 } else {
1628 1673 if (dtrace_casptr(&hash[bucket].dtdh_chain,
1629 1674 start, dvar->dtdv_next) != start) {
1630 1675 /*
1631 1676 * We have failed to atomically swing the
1632 1677 * hash table head pointer, presumably because
1633 1678 * of a conflicting allocation on another CPU.
1634 1679 * We need to reread the hash chain and try
1635 1680 * again.
1636 1681 */
1637 1682 goto top;
1638 1683 }
1639 1684 }
1640 1685
1641 1686 dtrace_membar_producer();
1642 1687
1643 1688 /*
1644 1689 * Now set the hash value to indicate that it's free.
1645 1690 */
1646 1691 ASSERT(hash[bucket].dtdh_chain != dvar);
1647 1692 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1648 1693
1649 1694 dtrace_membar_producer();
1650 1695
1651 1696 /*
1652 1697 * Set the next pointer to point at the dirty list, and
1653 1698 * atomically swing the dirty pointer to the newly freed dvar.
1654 1699 */
1655 1700 do {
1656 1701 next = dcpu->dtdsc_dirty;
1657 1702 dvar->dtdv_next = next;
1658 1703 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
1659 1704
1660 1705 /*
1661 1706 * Finally, unlock this hash bucket.
1662 1707 */
1663 1708 ASSERT(hash[bucket].dtdh_lock == lock);
1664 1709 ASSERT(lock & 1);
1665 1710 hash[bucket].dtdh_lock++;
1666 1711
1667 1712 return (NULL);
1668 1713 next:
1669 1714 prev = dvar;
1670 1715 continue;
1671 1716 }
1672 1717
1673 1718 if (dvar == NULL) {
1674 1719 /*
1675 1720 * If dvar is NULL, it is because we went off the rails:
1676 1721 * one of the elements that we traversed in the hash chain
1677 1722 * was deleted while we were traversing it. In this case,
1678 1723 * we assert that we aren't doing a dealloc (deallocs lock
1679 1724 * the hash bucket to prevent themselves from racing with
1680 1725 * one another), and retry the hash chain traversal.
1681 1726 */
1682 1727 ASSERT(op != DTRACE_DYNVAR_DEALLOC);
1683 1728 goto top;
1684 1729 }
1685 1730
1686 1731 if (op != DTRACE_DYNVAR_ALLOC) {
1687 1732 /*
1688 1733 * If we are not to allocate a new variable, we want to
1689 1734 * return NULL now. Before we return, check that the value
1690 1735 * of the lock word hasn't changed. If it has, we may have
1691 1736 * seen an inconsistent snapshot.
1692 1737 */
1693 1738 if (op == DTRACE_DYNVAR_NOALLOC) {
1694 1739 if (hash[bucket].dtdh_lock != lock)
1695 1740 goto top;
1696 1741 } else {
1697 1742 ASSERT(op == DTRACE_DYNVAR_DEALLOC);
1698 1743 ASSERT(hash[bucket].dtdh_lock == lock);
1699 1744 ASSERT(lock & 1);
1700 1745 hash[bucket].dtdh_lock++;
1701 1746 }
1702 1747
1703 1748 return (NULL);
1704 1749 }
1705 1750
1706 1751 /*
1707 1752 * We need to allocate a new dynamic variable. The size we need is the
1708 1753 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1709 1754 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1710 1755 * the size of any referred-to data (dsize). We then round the final
1711 1756 * size up to the chunksize for allocation.
1712 1757 */
1713 1758 for (ksize = 0, i = 0; i < nkeys; i++)
1714 1759 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
1715 1760
1716 1761 /*
1717 1762 * This should be pretty much impossible, but could happen if, say,
1718 1763 * strange DIF specified the tuple. Ideally, this should be an
1719 1764 * assertion and not an error condition -- but that requires that the
1720 1765 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1721 1766 * bullet-proof. (That is, it must not be able to be fooled by
1722 1767 * malicious DIF.) Given the lack of backwards branches in DIF,
1723 1768 * solving this would presumably not amount to solving the Halting
1724 1769 * Problem -- but it still seems awfully hard.
1725 1770 */
1726 1771 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
1727 1772 ksize + dsize > chunksize) {
1728 1773 dcpu->dtdsc_drops++;
1729 1774 return (NULL);
1730 1775 }
1731 1776
1732 1777 nstate = DTRACE_DSTATE_EMPTY;
1733 1778
1734 1779 do {
1735 1780 retry:
1736 1781 free = dcpu->dtdsc_free;
1737 1782
1738 1783 if (free == NULL) {
1739 1784 dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
1740 1785 void *rval;
1741 1786
1742 1787 if (clean == NULL) {
1743 1788 /*
1744 1789 * We're out of dynamic variable space on
1745 1790 * this CPU. Unless we have tried all CPUs,
1746 1791 * we'll try to allocate from a different
1747 1792 * CPU.
1748 1793 */
1749 1794 switch (dstate->dtds_state) {
1750 1795 case DTRACE_DSTATE_CLEAN: {
1751 1796 void *sp = &dstate->dtds_state;
1752 1797
1753 1798 if (++cpu >= NCPU)
1754 1799 cpu = 0;
1755 1800
1756 1801 if (dcpu->dtdsc_dirty != NULL &&
1757 1802 nstate == DTRACE_DSTATE_EMPTY)
1758 1803 nstate = DTRACE_DSTATE_DIRTY;
1759 1804
1760 1805 if (dcpu->dtdsc_rinsing != NULL)
1761 1806 nstate = DTRACE_DSTATE_RINSING;
1762 1807
1763 1808 dcpu = &dstate->dtds_percpu[cpu];
1764 1809
1765 1810 if (cpu != me)
1766 1811 goto retry;
1767 1812
1768 1813 (void) dtrace_cas32(sp,
1769 1814 DTRACE_DSTATE_CLEAN, nstate);
1770 1815
1771 1816 /*
1772 1817 * To increment the correct bean
1773 1818 * counter, take another lap.
1774 1819 */
1775 1820 goto retry;
1776 1821 }
1777 1822
1778 1823 case DTRACE_DSTATE_DIRTY:
1779 1824 dcpu->dtdsc_dirty_drops++;
1780 1825 break;
1781 1826
1782 1827 case DTRACE_DSTATE_RINSING:
1783 1828 dcpu->dtdsc_rinsing_drops++;
1784 1829 break;
1785 1830
1786 1831 case DTRACE_DSTATE_EMPTY:
1787 1832 dcpu->dtdsc_drops++;
1788 1833 break;
1789 1834 }
1790 1835
1791 1836 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
1792 1837 return (NULL);
1793 1838 }
1794 1839
1795 1840 /*
1796 1841 * The clean list appears to be non-empty. We want to
1797 1842 * move the clean list to the free list; we start by
1798 1843 * moving the clean pointer aside.
1799 1844 */
1800 1845 if (dtrace_casptr(&dcpu->dtdsc_clean,
1801 1846 clean, NULL) != clean) {
1802 1847 /*
1803 1848 * We are in one of two situations:
1804 1849 *
1805 1850 * (a) The clean list was switched to the
1806 1851 * free list by another CPU.
1807 1852 *
1808 1853 * (b) The clean list was added to by the
1809 1854 * cleansing cyclic.
1810 1855 *
1811 1856 * In either of these situations, we can
1812 1857 * just reattempt the free list allocation.
1813 1858 */
1814 1859 goto retry;
1815 1860 }
1816 1861
1817 1862 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
1818 1863
1819 1864 /*
1820 1865 * Now we'll move the clean list to our free list.
1821 1866 * It's impossible for this to fail: the only way
1822 1867 * the free list can be updated is through this
1823 1868 * code path, and only one CPU can own the clean list.
1824 1869 * Thus, it would only be possible for this to fail if
1825 1870 * this code were racing with dtrace_dynvar_clean().
1826 1871 * (That is, if dtrace_dynvar_clean() updated the clean
1827 1872 * list, and we ended up racing to update the free
1828 1873 * list.) This race is prevented by the dtrace_sync()
1829 1874 * in dtrace_dynvar_clean() -- which flushes the
1830 1875 * owners of the clean lists out before resetting
1831 1876 * the clean lists.
1832 1877 */
1833 1878 dcpu = &dstate->dtds_percpu[me];
1834 1879 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
1835 1880 ASSERT(rval == NULL);
1836 1881 goto retry;
1837 1882 }
1838 1883
1839 1884 dvar = free;
1840 1885 new_free = dvar->dtdv_next;
1841 1886 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
1842 1887
1843 1888 /*
1844 1889 * We have now allocated a new chunk. We copy the tuple keys into the
1845 1890 * tuple array and copy any referenced key data into the data space
1846 1891 * following the tuple array. As we do this, we relocate dttk_value
1847 1892 * in the final tuple to point to the key data address in the chunk.
1848 1893 */
1849 1894 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
1850 1895 dvar->dtdv_data = (void *)(kdata + ksize);
1851 1896 dvar->dtdv_tuple.dtt_nkeys = nkeys;
1852 1897
1853 1898 for (i = 0; i < nkeys; i++) {
1854 1899 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
1855 1900 size_t kesize = key[i].dttk_size;
1856 1901
1857 1902 if (kesize != 0) {
1858 1903 dtrace_bcopy(
1859 1904 (const void *)(uintptr_t)key[i].dttk_value,
1860 1905 (void *)kdata, kesize);
1861 1906 dkey->dttk_value = kdata;
1862 1907 kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
1863 1908 } else {
1864 1909 dkey->dttk_value = key[i].dttk_value;
1865 1910 }
1866 1911
1867 1912 dkey->dttk_size = kesize;
1868 1913 }
1869 1914
1870 1915 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
1871 1916 dvar->dtdv_hashval = hashval;
1872 1917 dvar->dtdv_next = start;
1873 1918
1874 1919 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
1875 1920 return (dvar);
1876 1921
1877 1922 /*
1878 1923 * The cas has failed. Either another CPU is adding an element to
1879 1924 * this hash chain, or another CPU is deleting an element from this
1880 1925 * hash chain. The simplest way to deal with both of these cases
1881 1926 * (though not necessarily the most efficient) is to free our
1882 1927 * allocated block and tail-call ourselves. Note that the free is
1883 1928 * to the dirty list and _not_ to the free list. This is to prevent
1884 1929 * races with allocators, above.
1885 1930 */
1886 1931 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1887 1932
1888 1933 dtrace_membar_producer();
1889 1934
1890 1935 do {
1891 1936 free = dcpu->dtdsc_dirty;
1892 1937 dvar->dtdv_next = free;
1893 1938 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
1894 1939
1895 1940 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
1896 1941 }
1897 1942
1898 1943 /*ARGSUSED*/
1899 1944 static void
1900 1945 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
1901 1946 {
1902 1947 if ((int64_t)nval < (int64_t)*oval)
1903 1948 *oval = nval;
1904 1949 }
1905 1950
1906 1951 /*ARGSUSED*/
1907 1952 static void
1908 1953 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
1909 1954 {
1910 1955 if ((int64_t)nval > (int64_t)*oval)
1911 1956 *oval = nval;
1912 1957 }
1913 1958
1914 1959 static void
1915 1960 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
1916 1961 {
1917 1962 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
1918 1963 int64_t val = (int64_t)nval;
1919 1964
1920 1965 if (val < 0) {
1921 1966 for (i = 0; i < zero; i++) {
1922 1967 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
1923 1968 quanta[i] += incr;
1924 1969 return;
1925 1970 }
1926 1971 }
1927 1972 } else {
1928 1973 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
1929 1974 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
1930 1975 quanta[i - 1] += incr;
1931 1976 return;
1932 1977 }
1933 1978 }
1934 1979
1935 1980 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
1936 1981 return;
1937 1982 }
1938 1983
1939 1984 ASSERT(0);
1940 1985 }
1941 1986
1942 1987 static void
1943 1988 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
1944 1989 {
1945 1990 uint64_t arg = *lquanta++;
1946 1991 int32_t base = DTRACE_LQUANTIZE_BASE(arg);
1947 1992 uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
1948 1993 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
1949 1994 int32_t val = (int32_t)nval, level;
1950 1995
1951 1996 ASSERT(step != 0);
1952 1997 ASSERT(levels != 0);
1953 1998
1954 1999 if (val < base) {
1955 2000 /*
1956 2001 * This is an underflow.
1957 2002 */
1958 2003 lquanta[0] += incr;
1959 2004 return;
1960 2005 }
1961 2006
1962 2007 level = (val - base) / step;
1963 2008
1964 2009 if (level < levels) {
1965 2010 lquanta[level + 1] += incr;
1966 2011 return;
1967 2012 }
1968 2013
1969 2014 /*
1970 2015 * This is an overflow.
1971 2016 */
1972 2017 lquanta[levels + 1] += incr;
1973 2018 }
1974 2019
1975 2020 static int
1976 2021 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
1977 2022 uint16_t high, uint16_t nsteps, int64_t value)
1978 2023 {
1979 2024 int64_t this = 1, last, next;
1980 2025 int base = 1, order;
1981 2026
1982 2027 ASSERT(factor <= nsteps);
1983 2028 ASSERT(nsteps % factor == 0);
1984 2029
1985 2030 for (order = 0; order < low; order++)
1986 2031 this *= factor;
1987 2032
1988 2033 /*
1989 2034 * If our value is less than our factor taken to the power of the
1990 2035 * low order of magnitude, it goes into the zeroth bucket.
1991 2036 */
1992 2037 if (value < (last = this))
1993 2038 return (0);
1994 2039
1995 2040 for (this *= factor; order <= high; order++) {
1996 2041 int nbuckets = this > nsteps ? nsteps : this;
1997 2042
1998 2043 if ((next = this * factor) < this) {
1999 2044 /*
2000 2045 * We should not generally get log/linear quantizations
2001 2046 * with a high magnitude that allows 64-bits to
2002 2047 * overflow, but we nonetheless protect against this
2003 2048 * by explicitly checking for overflow, and clamping
2004 2049 * our value accordingly.
2005 2050 */
2006 2051 value = this - 1;
2007 2052 }
2008 2053
2009 2054 if (value < this) {
2010 2055 /*
2011 2056 * If our value lies within this order of magnitude,
2012 2057 * determine its position by taking the offset within
2013 2058 * the order of magnitude, dividing by the bucket
2014 2059 * width, and adding to our (accumulated) base.
2015 2060 */
2016 2061 return (base + (value - last) / (this / nbuckets));
2017 2062 }
2018 2063
2019 2064 base += nbuckets - (nbuckets / factor);
2020 2065 last = this;
2021 2066 this = next;
2022 2067 }
2023 2068
2024 2069 /*
2025 2070 * Our value is greater than or equal to our factor taken to the
2026 2071 * power of one plus the high magnitude -- return the top bucket.
2027 2072 */
2028 2073 return (base);
2029 2074 }
2030 2075
2031 2076 static void
2032 2077 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2033 2078 {
2034 2079 uint64_t arg = *llquanta++;
2035 2080 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2036 2081 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2037 2082 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
2038 2083 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
2039 2084
2040 2085 llquanta[dtrace_aggregate_llquantize_bucket(factor,
2041 2086 low, high, nsteps, nval)] += incr;
2042 2087 }
2043 2088
2044 2089 /*ARGSUSED*/
2045 2090 static void
2046 2091 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2047 2092 {
2048 2093 data[0]++;
2049 2094 data[1] += nval;
2050 2095 }
2051 2096
2052 2097 /*ARGSUSED*/
2053 2098 static void
2054 2099 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2055 2100 {
2056 2101 int64_t snval = (int64_t)nval;
2057 2102 uint64_t tmp[2];
2058 2103
2059 2104 data[0]++;
2060 2105 data[1] += nval;
2061 2106
2062 2107 /*
2063 2108 * What we want to say here is:
2064 2109 *
2065 2110 * data[2] += nval * nval;
2066 2111 *
2067 2112 * But given that nval is 64-bit, we could easily overflow, so
2068 2113 * we do this as 128-bit arithmetic.
2069 2114 */
2070 2115 if (snval < 0)
2071 2116 snval = -snval;
2072 2117
2073 2118 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2074 2119 dtrace_add_128(data + 2, tmp, data + 2);
2075 2120 }
2076 2121
2077 2122 /*ARGSUSED*/
2078 2123 static void
2079 2124 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2080 2125 {
2081 2126 *oval = *oval + 1;
2082 2127 }
2083 2128
2084 2129 /*ARGSUSED*/
2085 2130 static void
2086 2131 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2087 2132 {
2088 2133 *oval += nval;
2089 2134 }
2090 2135
2091 2136 /*
2092 2137 * Aggregate given the tuple in the principal data buffer, and the aggregating
2093 2138 * action denoted by the specified dtrace_aggregation_t. The aggregation
2094 2139 * buffer is specified as the buf parameter. This routine does not return
2095 2140 * failure; if there is no space in the aggregation buffer, the data will be
2096 2141 * dropped, and a corresponding counter incremented.
2097 2142 */
2098 2143 static void
2099 2144 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2100 2145 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2101 2146 {
2102 2147 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2103 2148 uint32_t i, ndx, size, fsize;
2104 2149 uint32_t align = sizeof (uint64_t) - 1;
2105 2150 dtrace_aggbuffer_t *agb;
2106 2151 dtrace_aggkey_t *key;
2107 2152 uint32_t hashval = 0, limit, isstr;
2108 2153 caddr_t tomax, data, kdata;
2109 2154 dtrace_actkind_t action;
2110 2155 dtrace_action_t *act;
2111 2156 uintptr_t offs;
2112 2157
2113 2158 if (buf == NULL)
2114 2159 return;
2115 2160
2116 2161 if (!agg->dtag_hasarg) {
2117 2162 /*
2118 2163 * Currently, only quantize() and lquantize() take additional
2119 2164 * arguments, and they have the same semantics: an increment
2120 2165 * value that defaults to 1 when not present. If additional
2121 2166 * aggregating actions take arguments, the setting of the
2122 2167 * default argument value will presumably have to become more
2123 2168 * sophisticated...
2124 2169 */
2125 2170 arg = 1;
2126 2171 }
2127 2172
2128 2173 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2129 2174 size = rec->dtrd_offset - agg->dtag_base;
2130 2175 fsize = size + rec->dtrd_size;
2131 2176
2132 2177 ASSERT(dbuf->dtb_tomax != NULL);
2133 2178 data = dbuf->dtb_tomax + offset + agg->dtag_base;
2134 2179
2135 2180 if ((tomax = buf->dtb_tomax) == NULL) {
2136 2181 dtrace_buffer_drop(buf);
2137 2182 return;
2138 2183 }
2139 2184
2140 2185 /*
2141 2186 * The metastructure is always at the bottom of the buffer.
2142 2187 */
2143 2188 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2144 2189 sizeof (dtrace_aggbuffer_t));
2145 2190
2146 2191 if (buf->dtb_offset == 0) {
2147 2192 /*
2148 2193 * We just kludge up approximately 1/8th of the size to be
2149 2194 * buckets. If this guess ends up being routinely
2150 2195 * off-the-mark, we may need to dynamically readjust this
2151 2196 * based on past performance.
2152 2197 */
2153 2198 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2154 2199
2155 2200 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2156 2201 (uintptr_t)tomax || hashsize == 0) {
2157 2202 /*
2158 2203 * We've been given a ludicrously small buffer;
2159 2204 * increment our drop count and leave.
2160 2205 */
2161 2206 dtrace_buffer_drop(buf);
2162 2207 return;
2163 2208 }
2164 2209
2165 2210 /*
2166 2211 * And now, a pathetic attempt to try to get a an odd (or
2167 2212 * perchance, a prime) hash size for better hash distribution.
2168 2213 */
2169 2214 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2170 2215 hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2171 2216
2172 2217 agb->dtagb_hashsize = hashsize;
2173 2218 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2174 2219 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2175 2220 agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2176 2221
2177 2222 for (i = 0; i < agb->dtagb_hashsize; i++)
2178 2223 agb->dtagb_hash[i] = NULL;
2179 2224 }
2180 2225
2181 2226 ASSERT(agg->dtag_first != NULL);
2182 2227 ASSERT(agg->dtag_first->dta_intuple);
2183 2228
2184 2229 /*
2185 2230 * Calculate the hash value based on the key. Note that we _don't_
2186 2231 * include the aggid in the hashing (but we will store it as part of
2187 2232 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2188 2233 * algorithm: a simple, quick algorithm that has no known funnels, and
2189 2234 * gets good distribution in practice. The efficacy of the hashing
2190 2235 * algorithm (and a comparison with other algorithms) may be found by
2191 2236 * running the ::dtrace_aggstat MDB dcmd.
2192 2237 */
2193 2238 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2194 2239 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2195 2240 limit = i + act->dta_rec.dtrd_size;
2196 2241 ASSERT(limit <= size);
2197 2242 isstr = DTRACEACT_ISSTRING(act);
2198 2243
2199 2244 for (; i < limit; i++) {
2200 2245 hashval += data[i];
2201 2246 hashval += (hashval << 10);
2202 2247 hashval ^= (hashval >> 6);
2203 2248
2204 2249 if (isstr && data[i] == '\0')
2205 2250 break;
2206 2251 }
2207 2252 }
2208 2253
2209 2254 hashval += (hashval << 3);
2210 2255 hashval ^= (hashval >> 11);
2211 2256 hashval += (hashval << 15);
2212 2257
2213 2258 /*
2214 2259 * Yes, the divide here is expensive -- but it's generally the least
2215 2260 * of the performance issues given the amount of data that we iterate
2216 2261 * over to compute hash values, compare data, etc.
2217 2262 */
2218 2263 ndx = hashval % agb->dtagb_hashsize;
2219 2264
2220 2265 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2221 2266 ASSERT((caddr_t)key >= tomax);
2222 2267 ASSERT((caddr_t)key < tomax + buf->dtb_size);
2223 2268
2224 2269 if (hashval != key->dtak_hashval || key->dtak_size != size)
2225 2270 continue;
2226 2271
2227 2272 kdata = key->dtak_data;
2228 2273 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2229 2274
2230 2275 for (act = agg->dtag_first; act->dta_intuple;
2231 2276 act = act->dta_next) {
2232 2277 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2233 2278 limit = i + act->dta_rec.dtrd_size;
2234 2279 ASSERT(limit <= size);
2235 2280 isstr = DTRACEACT_ISSTRING(act);
2236 2281
2237 2282 for (; i < limit; i++) {
2238 2283 if (kdata[i] != data[i])
2239 2284 goto next;
2240 2285
2241 2286 if (isstr && data[i] == '\0')
2242 2287 break;
2243 2288 }
2244 2289 }
2245 2290
2246 2291 if (action != key->dtak_action) {
2247 2292 /*
2248 2293 * We are aggregating on the same value in the same
2249 2294 * aggregation with two different aggregating actions.
2250 2295 * (This should have been picked up in the compiler,
2251 2296 * so we may be dealing with errant or devious DIF.)
2252 2297 * This is an error condition; we indicate as much,
2253 2298 * and return.
2254 2299 */
2255 2300 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2256 2301 return;
2257 2302 }
2258 2303
2259 2304 /*
2260 2305 * This is a hit: we need to apply the aggregator to
2261 2306 * the value at this key.
2262 2307 */
2263 2308 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2264 2309 return;
2265 2310 next:
2266 2311 continue;
2267 2312 }
2268 2313
2269 2314 /*
2270 2315 * We didn't find it. We need to allocate some zero-filled space,
2271 2316 * link it into the hash table appropriately, and apply the aggregator
2272 2317 * to the (zero-filled) value.
2273 2318 */
2274 2319 offs = buf->dtb_offset;
2275 2320 while (offs & (align - 1))
2276 2321 offs += sizeof (uint32_t);
2277 2322
2278 2323 /*
2279 2324 * If we don't have enough room to both allocate a new key _and_
2280 2325 * its associated data, increment the drop count and return.
2281 2326 */
2282 2327 if ((uintptr_t)tomax + offs + fsize >
2283 2328 agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2284 2329 dtrace_buffer_drop(buf);
2285 2330 return;
2286 2331 }
2287 2332
2288 2333 /*CONSTCOND*/
2289 2334 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2290 2335 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2291 2336 agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2292 2337
2293 2338 key->dtak_data = kdata = tomax + offs;
2294 2339 buf->dtb_offset = offs + fsize;
2295 2340
2296 2341 /*
2297 2342 * Now copy the data across.
2298 2343 */
2299 2344 *((dtrace_aggid_t *)kdata) = agg->dtag_id;
2300 2345
2301 2346 for (i = sizeof (dtrace_aggid_t); i < size; i++)
2302 2347 kdata[i] = data[i];
2303 2348
2304 2349 /*
2305 2350 * Because strings are not zeroed out by default, we need to iterate
2306 2351 * looking for actions that store strings, and we need to explicitly
2307 2352 * pad these strings out with zeroes.
2308 2353 */
2309 2354 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2310 2355 int nul;
2311 2356
2312 2357 if (!DTRACEACT_ISSTRING(act))
2313 2358 continue;
2314 2359
2315 2360 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2316 2361 limit = i + act->dta_rec.dtrd_size;
2317 2362 ASSERT(limit <= size);
2318 2363
2319 2364 for (nul = 0; i < limit; i++) {
2320 2365 if (nul) {
2321 2366 kdata[i] = '\0';
2322 2367 continue;
2323 2368 }
2324 2369
2325 2370 if (data[i] != '\0')
2326 2371 continue;
2327 2372
2328 2373 nul = 1;
2329 2374 }
2330 2375 }
2331 2376
2332 2377 for (i = size; i < fsize; i++)
2333 2378 kdata[i] = 0;
2334 2379
2335 2380 key->dtak_hashval = hashval;
2336 2381 key->dtak_size = size;
2337 2382 key->dtak_action = action;
2338 2383 key->dtak_next = agb->dtagb_hash[ndx];
2339 2384 agb->dtagb_hash[ndx] = key;
2340 2385
2341 2386 /*
2342 2387 * Finally, apply the aggregator.
2343 2388 */
2344 2389 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2345 2390 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2346 2391 }
2347 2392
2348 2393 /*
2349 2394 * Given consumer state, this routine finds a speculation in the INACTIVE
2350 2395 * state and transitions it into the ACTIVE state. If there is no speculation
2351 2396 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2352 2397 * incremented -- it is up to the caller to take appropriate action.
2353 2398 */
2354 2399 static int
2355 2400 dtrace_speculation(dtrace_state_t *state)
2356 2401 {
2357 2402 int i = 0;
2358 2403 dtrace_speculation_state_t current;
2359 2404 uint32_t *stat = &state->dts_speculations_unavail, count;
2360 2405
2361 2406 while (i < state->dts_nspeculations) {
2362 2407 dtrace_speculation_t *spec = &state->dts_speculations[i];
2363 2408
2364 2409 current = spec->dtsp_state;
2365 2410
2366 2411 if (current != DTRACESPEC_INACTIVE) {
2367 2412 if (current == DTRACESPEC_COMMITTINGMANY ||
2368 2413 current == DTRACESPEC_COMMITTING ||
2369 2414 current == DTRACESPEC_DISCARDING)
2370 2415 stat = &state->dts_speculations_busy;
2371 2416 i++;
2372 2417 continue;
2373 2418 }
2374 2419
2375 2420 if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2376 2421 current, DTRACESPEC_ACTIVE) == current)
2377 2422 return (i + 1);
2378 2423 }
2379 2424
2380 2425 /*
2381 2426 * We couldn't find a speculation. If we found as much as a single
2382 2427 * busy speculation buffer, we'll attribute this failure as "busy"
2383 2428 * instead of "unavail".
2384 2429 */
2385 2430 do {
2386 2431 count = *stat;
2387 2432 } while (dtrace_cas32(stat, count, count + 1) != count);
2388 2433
2389 2434 return (0);
2390 2435 }
2391 2436
2392 2437 /*
2393 2438 * This routine commits an active speculation. If the specified speculation
2394 2439 * is not in a valid state to perform a commit(), this routine will silently do
2395 2440 * nothing. The state of the specified speculation is transitioned according
2396 2441 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2397 2442 */
2398 2443 static void
2399 2444 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2400 2445 dtrace_specid_t which)
2401 2446 {
2402 2447 dtrace_speculation_t *spec;
2403 2448 dtrace_buffer_t *src, *dest;
2404 2449 uintptr_t daddr, saddr, dlimit;
2405 2450 dtrace_speculation_state_t current, new;
2406 2451 intptr_t offs;
2407 2452
2408 2453 if (which == 0)
2409 2454 return;
2410 2455
2411 2456 if (which > state->dts_nspeculations) {
2412 2457 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2413 2458 return;
2414 2459 }
2415 2460
2416 2461 spec = &state->dts_speculations[which - 1];
2417 2462 src = &spec->dtsp_buffer[cpu];
2418 2463 dest = &state->dts_buffer[cpu];
2419 2464
2420 2465 do {
2421 2466 current = spec->dtsp_state;
2422 2467
2423 2468 if (current == DTRACESPEC_COMMITTINGMANY)
2424 2469 break;
2425 2470
2426 2471 switch (current) {
2427 2472 case DTRACESPEC_INACTIVE:
2428 2473 case DTRACESPEC_DISCARDING:
2429 2474 return;
2430 2475
2431 2476 case DTRACESPEC_COMMITTING:
2432 2477 /*
2433 2478 * This is only possible if we are (a) commit()'ing
2434 2479 * without having done a prior speculate() on this CPU
2435 2480 * and (b) racing with another commit() on a different
2436 2481 * CPU. There's nothing to do -- we just assert that
2437 2482 * our offset is 0.
2438 2483 */
2439 2484 ASSERT(src->dtb_offset == 0);
2440 2485 return;
2441 2486
2442 2487 case DTRACESPEC_ACTIVE:
2443 2488 new = DTRACESPEC_COMMITTING;
2444 2489 break;
2445 2490
2446 2491 case DTRACESPEC_ACTIVEONE:
2447 2492 /*
2448 2493 * This speculation is active on one CPU. If our
2449 2494 * buffer offset is non-zero, we know that the one CPU
2450 2495 * must be us. Otherwise, we are committing on a
2451 2496 * different CPU from the speculate(), and we must
2452 2497 * rely on being asynchronously cleaned.
2453 2498 */
2454 2499 if (src->dtb_offset != 0) {
2455 2500 new = DTRACESPEC_COMMITTING;
2456 2501 break;
2457 2502 }
2458 2503 /*FALLTHROUGH*/
2459 2504
2460 2505 case DTRACESPEC_ACTIVEMANY:
2461 2506 new = DTRACESPEC_COMMITTINGMANY;
2462 2507 break;
2463 2508
2464 2509 default:
2465 2510 ASSERT(0);
2466 2511 }
2467 2512 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2468 2513 current, new) != current);
2469 2514
2470 2515 /*
2471 2516 * We have set the state to indicate that we are committing this
2472 2517 * speculation. Now reserve the necessary space in the destination
2473 2518 * buffer.
2474 2519 */
2475 2520 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2476 2521 sizeof (uint64_t), state, NULL)) < 0) {
2477 2522 dtrace_buffer_drop(dest);
2478 2523 goto out;
2479 2524 }
2480 2525
2481 2526 /*
2482 2527 * We have the space; copy the buffer across. (Note that this is a
2483 2528 * highly subobtimal bcopy(); in the unlikely event that this becomes
2484 2529 * a serious performance issue, a high-performance DTrace-specific
2485 2530 * bcopy() should obviously be invented.)
2486 2531 */
2487 2532 daddr = (uintptr_t)dest->dtb_tomax + offs;
2488 2533 dlimit = daddr + src->dtb_offset;
2489 2534 saddr = (uintptr_t)src->dtb_tomax;
2490 2535
2491 2536 /*
2492 2537 * First, the aligned portion.
2493 2538 */
2494 2539 while (dlimit - daddr >= sizeof (uint64_t)) {
2495 2540 *((uint64_t *)daddr) = *((uint64_t *)saddr);
2496 2541
2497 2542 daddr += sizeof (uint64_t);
2498 2543 saddr += sizeof (uint64_t);
2499 2544 }
2500 2545
2501 2546 /*
2502 2547 * Now any left-over bit...
2503 2548 */
2504 2549 while (dlimit - daddr)
2505 2550 *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
2506 2551
2507 2552 /*
2508 2553 * Finally, commit the reserved space in the destination buffer.
2509 2554 */
2510 2555 dest->dtb_offset = offs + src->dtb_offset;
2511 2556
2512 2557 out:
2513 2558 /*
2514 2559 * If we're lucky enough to be the only active CPU on this speculation
2515 2560 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2516 2561 */
2517 2562 if (current == DTRACESPEC_ACTIVE ||
2518 2563 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
2519 2564 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
2520 2565 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
2521 2566
2522 2567 ASSERT(rval == DTRACESPEC_COMMITTING);
2523 2568 }
2524 2569
2525 2570 src->dtb_offset = 0;
2526 2571 src->dtb_xamot_drops += src->dtb_drops;
2527 2572 src->dtb_drops = 0;
2528 2573 }
2529 2574
2530 2575 /*
2531 2576 * This routine discards an active speculation. If the specified speculation
2532 2577 * is not in a valid state to perform a discard(), this routine will silently
2533 2578 * do nothing. The state of the specified speculation is transitioned
2534 2579 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2535 2580 */
2536 2581 static void
2537 2582 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
2538 2583 dtrace_specid_t which)
2539 2584 {
2540 2585 dtrace_speculation_t *spec;
2541 2586 dtrace_speculation_state_t current, new;
2542 2587 dtrace_buffer_t *buf;
2543 2588
2544 2589 if (which == 0)
2545 2590 return;
2546 2591
2547 2592 if (which > state->dts_nspeculations) {
2548 2593 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2549 2594 return;
2550 2595 }
2551 2596
2552 2597 spec = &state->dts_speculations[which - 1];
2553 2598 buf = &spec->dtsp_buffer[cpu];
2554 2599
2555 2600 do {
2556 2601 current = spec->dtsp_state;
2557 2602
2558 2603 switch (current) {
2559 2604 case DTRACESPEC_INACTIVE:
2560 2605 case DTRACESPEC_COMMITTINGMANY:
2561 2606 case DTRACESPEC_COMMITTING:
2562 2607 case DTRACESPEC_DISCARDING:
2563 2608 return;
2564 2609
2565 2610 case DTRACESPEC_ACTIVE:
2566 2611 case DTRACESPEC_ACTIVEMANY:
2567 2612 new = DTRACESPEC_DISCARDING;
2568 2613 break;
2569 2614
2570 2615 case DTRACESPEC_ACTIVEONE:
2571 2616 if (buf->dtb_offset != 0) {
2572 2617 new = DTRACESPEC_INACTIVE;
2573 2618 } else {
2574 2619 new = DTRACESPEC_DISCARDING;
2575 2620 }
2576 2621 break;
2577 2622
2578 2623 default:
2579 2624 ASSERT(0);
2580 2625 }
2581 2626 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2582 2627 current, new) != current);
2583 2628
2584 2629 buf->dtb_offset = 0;
2585 2630 buf->dtb_drops = 0;
2586 2631 }
2587 2632
2588 2633 /*
2589 2634 * Note: not called from probe context. This function is called
2590 2635 * asynchronously from cross call context to clean any speculations that are
2591 2636 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2592 2637 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2593 2638 * speculation.
2594 2639 */
2595 2640 static void
2596 2641 dtrace_speculation_clean_here(dtrace_state_t *state)
2597 2642 {
2598 2643 dtrace_icookie_t cookie;
2599 2644 processorid_t cpu = CPU->cpu_id;
2600 2645 dtrace_buffer_t *dest = &state->dts_buffer[cpu];
2601 2646 dtrace_specid_t i;
2602 2647
2603 2648 cookie = dtrace_interrupt_disable();
2604 2649
2605 2650 if (dest->dtb_tomax == NULL) {
2606 2651 dtrace_interrupt_enable(cookie);
2607 2652 return;
2608 2653 }
2609 2654
2610 2655 for (i = 0; i < state->dts_nspeculations; i++) {
2611 2656 dtrace_speculation_t *spec = &state->dts_speculations[i];
2612 2657 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
2613 2658
2614 2659 if (src->dtb_tomax == NULL)
2615 2660 continue;
2616 2661
2617 2662 if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
2618 2663 src->dtb_offset = 0;
2619 2664 continue;
2620 2665 }
2621 2666
2622 2667 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2623 2668 continue;
2624 2669
2625 2670 if (src->dtb_offset == 0)
2626 2671 continue;
2627 2672
2628 2673 dtrace_speculation_commit(state, cpu, i + 1);
2629 2674 }
2630 2675
2631 2676 dtrace_interrupt_enable(cookie);
2632 2677 }
2633 2678
2634 2679 /*
2635 2680 * Note: not called from probe context. This function is called
2636 2681 * asynchronously (and at a regular interval) to clean any speculations that
2637 2682 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2638 2683 * is work to be done, it cross calls all CPUs to perform that work;
2639 2684 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2640 2685 * INACTIVE state until they have been cleaned by all CPUs.
2641 2686 */
2642 2687 static void
2643 2688 dtrace_speculation_clean(dtrace_state_t *state)
2644 2689 {
2645 2690 int work = 0, rv;
2646 2691 dtrace_specid_t i;
2647 2692
2648 2693 for (i = 0; i < state->dts_nspeculations; i++) {
2649 2694 dtrace_speculation_t *spec = &state->dts_speculations[i];
2650 2695
2651 2696 ASSERT(!spec->dtsp_cleaning);
2652 2697
2653 2698 if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
2654 2699 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2655 2700 continue;
2656 2701
2657 2702 work++;
2658 2703 spec->dtsp_cleaning = 1;
2659 2704 }
2660 2705
2661 2706 if (!work)
2662 2707 return;
2663 2708
2664 2709 dtrace_xcall(DTRACE_CPUALL,
2665 2710 (dtrace_xcall_t)dtrace_speculation_clean_here, state);
2666 2711
2667 2712 /*
2668 2713 * We now know that all CPUs have committed or discarded their
2669 2714 * speculation buffers, as appropriate. We can now set the state
2670 2715 * to inactive.
2671 2716 */
2672 2717 for (i = 0; i < state->dts_nspeculations; i++) {
2673 2718 dtrace_speculation_t *spec = &state->dts_speculations[i];
2674 2719 dtrace_speculation_state_t current, new;
2675 2720
2676 2721 if (!spec->dtsp_cleaning)
2677 2722 continue;
2678 2723
2679 2724 current = spec->dtsp_state;
2680 2725 ASSERT(current == DTRACESPEC_DISCARDING ||
2681 2726 current == DTRACESPEC_COMMITTINGMANY);
2682 2727
2683 2728 new = DTRACESPEC_INACTIVE;
2684 2729
2685 2730 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
2686 2731 ASSERT(rv == current);
2687 2732 spec->dtsp_cleaning = 0;
2688 2733 }
2689 2734 }
2690 2735
2691 2736 /*
2692 2737 * Called as part of a speculate() to get the speculative buffer associated
2693 2738 * with a given speculation. Returns NULL if the specified speculation is not
2694 2739 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2695 2740 * the active CPU is not the specified CPU -- the speculation will be
2696 2741 * atomically transitioned into the ACTIVEMANY state.
2697 2742 */
2698 2743 static dtrace_buffer_t *
2699 2744 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
2700 2745 dtrace_specid_t which)
2701 2746 {
2702 2747 dtrace_speculation_t *spec;
2703 2748 dtrace_speculation_state_t current, new;
2704 2749 dtrace_buffer_t *buf;
2705 2750
2706 2751 if (which == 0)
2707 2752 return (NULL);
2708 2753
2709 2754 if (which > state->dts_nspeculations) {
2710 2755 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2711 2756 return (NULL);
2712 2757 }
2713 2758
2714 2759 spec = &state->dts_speculations[which - 1];
2715 2760 buf = &spec->dtsp_buffer[cpuid];
2716 2761
2717 2762 do {
2718 2763 current = spec->dtsp_state;
2719 2764
2720 2765 switch (current) {
2721 2766 case DTRACESPEC_INACTIVE:
2722 2767 case DTRACESPEC_COMMITTINGMANY:
2723 2768 case DTRACESPEC_DISCARDING:
2724 2769 return (NULL);
2725 2770
2726 2771 case DTRACESPEC_COMMITTING:
2727 2772 ASSERT(buf->dtb_offset == 0);
2728 2773 return (NULL);
2729 2774
2730 2775 case DTRACESPEC_ACTIVEONE:
2731 2776 /*
2732 2777 * This speculation is currently active on one CPU.
2733 2778 * Check the offset in the buffer; if it's non-zero,
2734 2779 * that CPU must be us (and we leave the state alone).
2735 2780 * If it's zero, assume that we're starting on a new
2736 2781 * CPU -- and change the state to indicate that the
2737 2782 * speculation is active on more than one CPU.
2738 2783 */
2739 2784 if (buf->dtb_offset != 0)
2740 2785 return (buf);
2741 2786
2742 2787 new = DTRACESPEC_ACTIVEMANY;
2743 2788 break;
2744 2789
2745 2790 case DTRACESPEC_ACTIVEMANY:
2746 2791 return (buf);
2747 2792
2748 2793 case DTRACESPEC_ACTIVE:
2749 2794 new = DTRACESPEC_ACTIVEONE;
2750 2795 break;
2751 2796
2752 2797 default:
2753 2798 ASSERT(0);
2754 2799 }
2755 2800 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2756 2801 current, new) != current);
2757 2802
2758 2803 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
2759 2804 return (buf);
2760 2805 }
2761 2806
2762 2807 /*
2763 2808 * Return a string. In the event that the user lacks the privilege to access
2764 2809 * arbitrary kernel memory, we copy the string out to scratch memory so that we
2765 2810 * don't fail access checking.
2766 2811 *
2767 2812 * dtrace_dif_variable() uses this routine as a helper for various
2768 2813 * builtin values such as 'execname' and 'probefunc.'
2769 2814 */
2770 2815 uintptr_t
2771 2816 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
2772 2817 dtrace_mstate_t *mstate)
2773 2818 {
2774 2819 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
2775 2820 uintptr_t ret;
2776 2821 size_t strsz;
2777 2822
2778 2823 /*
2779 2824 * The easy case: this probe is allowed to read all of memory, so
2780 2825 * we can just return this as a vanilla pointer.
2781 2826 */
2782 2827 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
2783 2828 return (addr);
2784 2829
2785 2830 /*
2786 2831 * This is the tougher case: we copy the string in question from
2787 2832 * kernel memory into scratch memory and return it that way: this
2788 2833 * ensures that we won't trip up when access checking tests the
2789 2834 * BYREF return value.
2790 2835 */
2791 2836 strsz = dtrace_strlen((char *)addr, size) + 1;
2792 2837
2793 2838 if (mstate->dtms_scratch_ptr + strsz >
2794 2839 mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
2795 2840 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
2796 2841 return (NULL);
2797 2842 }
2798 2843
2799 2844 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
2800 2845 strsz);
2801 2846 ret = mstate->dtms_scratch_ptr;
2802 2847 mstate->dtms_scratch_ptr += strsz;
2803 2848 return (ret);
2804 2849 }
2805 2850
2806 2851 /*
2807 2852 * This function implements the DIF emulator's variable lookups. The emulator
2808 2853 * passes a reserved variable identifier and optional built-in array index.
2809 2854 */
2810 2855 static uint64_t
2811 2856 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
2812 2857 uint64_t ndx)
2813 2858 {
2814 2859 /*
2815 2860 * If we're accessing one of the uncached arguments, we'll turn this
2816 2861 * into a reference in the args array.
2817 2862 */
2818 2863 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
2819 2864 ndx = v - DIF_VAR_ARG0;
2820 2865 v = DIF_VAR_ARGS;
2821 2866 }
2822 2867
2823 2868 switch (v) {
2824 2869 case DIF_VAR_ARGS:
2825 2870 if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
2826 2871 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
2827 2872 CPU_DTRACE_KPRIV;
2828 2873 return (0);
2829 2874 }
2830 2875
2831 2876 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
2832 2877 if (ndx >= sizeof (mstate->dtms_arg) /
2833 2878 sizeof (mstate->dtms_arg[0])) {
2834 2879 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
2835 2880 dtrace_provider_t *pv;
2836 2881 uint64_t val;
2837 2882
2838 2883 pv = mstate->dtms_probe->dtpr_provider;
2839 2884 if (pv->dtpv_pops.dtps_getargval != NULL)
2840 2885 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
2841 2886 mstate->dtms_probe->dtpr_id,
2842 2887 mstate->dtms_probe->dtpr_arg, ndx, aframes);
2843 2888 else
2844 2889 val = dtrace_getarg(ndx, aframes);
2845 2890
2846 2891 /*
2847 2892 * This is regrettably required to keep the compiler
2848 2893 * from tail-optimizing the call to dtrace_getarg().
2849 2894 * The condition always evaluates to true, but the
2850 2895 * compiler has no way of figuring that out a priori.
2851 2896 * (None of this would be necessary if the compiler
2852 2897 * could be relied upon to _always_ tail-optimize
2853 2898 * the call to dtrace_getarg() -- but it can't.)
2854 2899 */
2855 2900 if (mstate->dtms_probe != NULL)
2856 2901 return (val);
2857 2902
2858 2903 ASSERT(0);
2859 2904 }
2860 2905
2861 2906 return (mstate->dtms_arg[ndx]);
2862 2907
2863 2908 case DIF_VAR_UREGS: {
2864 2909 klwp_t *lwp;
2865 2910
2866 2911 if (!dtrace_priv_proc(state, mstate))
2867 2912 return (0);
2868 2913
2869 2914 if ((lwp = curthread->t_lwp) == NULL) {
2870 2915 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
2871 2916 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL;
2872 2917 return (0);
2873 2918 }
2874 2919
2875 2920 return (dtrace_getreg(lwp->lwp_regs, ndx));
2876 2921 }
2877 2922
2878 2923 case DIF_VAR_VMREGS: {
2879 2924 uint64_t rval;
2880 2925
2881 2926 if (!dtrace_priv_kernel(state))
2882 2927 return (0);
2883 2928
2884 2929 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
↓ open down ↓ |
2166 lines elided |
↑ open up ↑ |
2885 2930
2886 2931 rval = dtrace_getvmreg(ndx,
2887 2932 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
2888 2933
2889 2934 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2890 2935
2891 2936 return (rval);
2892 2937 }
2893 2938
2894 2939 case DIF_VAR_CURTHREAD:
2895 - if (!dtrace_priv_kernel(state))
2940 + if (!dtrace_priv_proc(state, mstate))
2896 2941 return (0);
2897 2942 return ((uint64_t)(uintptr_t)curthread);
2898 2943
2899 2944 case DIF_VAR_TIMESTAMP:
2900 2945 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
2901 2946 mstate->dtms_timestamp = dtrace_gethrtime();
2902 2947 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
2903 2948 }
2904 2949 return (mstate->dtms_timestamp);
2905 2950
2906 2951 case DIF_VAR_VTIMESTAMP:
2907 2952 ASSERT(dtrace_vtime_references != 0);
2908 2953 return (curthread->t_dtrace_vtime);
2909 2954
2910 2955 case DIF_VAR_WALLTIMESTAMP:
2911 2956 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
2912 2957 mstate->dtms_walltimestamp = dtrace_gethrestime();
2913 2958 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
2914 2959 }
2915 2960 return (mstate->dtms_walltimestamp);
2916 2961
2917 2962 case DIF_VAR_IPL:
2918 2963 if (!dtrace_priv_kernel(state))
2919 2964 return (0);
2920 2965 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
2921 2966 mstate->dtms_ipl = dtrace_getipl();
2922 2967 mstate->dtms_present |= DTRACE_MSTATE_IPL;
2923 2968 }
2924 2969 return (mstate->dtms_ipl);
2925 2970
2926 2971 case DIF_VAR_EPID:
2927 2972 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
2928 2973 return (mstate->dtms_epid);
2929 2974
2930 2975 case DIF_VAR_ID:
2931 2976 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
2932 2977 return (mstate->dtms_probe->dtpr_id);
2933 2978
2934 2979 case DIF_VAR_STACKDEPTH:
2935 2980 if (!dtrace_priv_kernel(state))
2936 2981 return (0);
2937 2982 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
2938 2983 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
2939 2984
2940 2985 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
2941 2986 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
2942 2987 }
2943 2988 return (mstate->dtms_stackdepth);
2944 2989
2945 2990 case DIF_VAR_USTACKDEPTH:
2946 2991 if (!dtrace_priv_proc(state, mstate))
2947 2992 return (0);
2948 2993 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
2949 2994 /*
2950 2995 * See comment in DIF_VAR_PID.
2951 2996 */
2952 2997 if (DTRACE_ANCHORED(mstate->dtms_probe) &&
2953 2998 CPU_ON_INTR(CPU)) {
2954 2999 mstate->dtms_ustackdepth = 0;
2955 3000 } else {
2956 3001 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
2957 3002 mstate->dtms_ustackdepth =
2958 3003 dtrace_getustackdepth();
2959 3004 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2960 3005 }
2961 3006 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
2962 3007 }
2963 3008 return (mstate->dtms_ustackdepth);
2964 3009
2965 3010 case DIF_VAR_CALLER:
2966 3011 if (!dtrace_priv_kernel(state))
2967 3012 return (0);
2968 3013 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
2969 3014 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
2970 3015
2971 3016 if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
2972 3017 /*
2973 3018 * If this is an unanchored probe, we are
2974 3019 * required to go through the slow path:
2975 3020 * dtrace_caller() only guarantees correct
2976 3021 * results for anchored probes.
2977 3022 */
2978 3023 pc_t caller[2];
2979 3024
2980 3025 dtrace_getpcstack(caller, 2, aframes,
2981 3026 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
2982 3027 mstate->dtms_caller = caller[1];
2983 3028 } else if ((mstate->dtms_caller =
2984 3029 dtrace_caller(aframes)) == -1) {
2985 3030 /*
2986 3031 * We have failed to do this the quick way;
2987 3032 * we must resort to the slower approach of
2988 3033 * calling dtrace_getpcstack().
2989 3034 */
2990 3035 pc_t caller;
2991 3036
2992 3037 dtrace_getpcstack(&caller, 1, aframes, NULL);
2993 3038 mstate->dtms_caller = caller;
2994 3039 }
2995 3040
2996 3041 mstate->dtms_present |= DTRACE_MSTATE_CALLER;
2997 3042 }
2998 3043 return (mstate->dtms_caller);
2999 3044
3000 3045 case DIF_VAR_UCALLER:
3001 3046 if (!dtrace_priv_proc(state, mstate))
3002 3047 return (0);
3003 3048
3004 3049 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3005 3050 uint64_t ustack[3];
3006 3051
3007 3052 /*
3008 3053 * dtrace_getupcstack() fills in the first uint64_t
3009 3054 * with the current PID. The second uint64_t will
3010 3055 * be the program counter at user-level. The third
3011 3056 * uint64_t will contain the caller, which is what
3012 3057 * we're after.
3013 3058 */
3014 3059 ustack[2] = NULL;
3015 3060 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3016 3061 dtrace_getupcstack(ustack, 3);
3017 3062 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3018 3063 mstate->dtms_ucaller = ustack[2];
3019 3064 mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3020 3065 }
3021 3066
3022 3067 return (mstate->dtms_ucaller);
3023 3068
3024 3069 case DIF_VAR_PROBEPROV:
3025 3070 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3026 3071 return (dtrace_dif_varstr(
3027 3072 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3028 3073 state, mstate));
3029 3074
3030 3075 case DIF_VAR_PROBEMOD:
3031 3076 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3032 3077 return (dtrace_dif_varstr(
3033 3078 (uintptr_t)mstate->dtms_probe->dtpr_mod,
3034 3079 state, mstate));
3035 3080
3036 3081 case DIF_VAR_PROBEFUNC:
3037 3082 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3038 3083 return (dtrace_dif_varstr(
3039 3084 (uintptr_t)mstate->dtms_probe->dtpr_func,
3040 3085 state, mstate));
3041 3086
3042 3087 case DIF_VAR_PROBENAME:
3043 3088 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3044 3089 return (dtrace_dif_varstr(
3045 3090 (uintptr_t)mstate->dtms_probe->dtpr_name,
3046 3091 state, mstate));
3047 3092
3048 3093 case DIF_VAR_PID:
3049 3094 if (!dtrace_priv_proc(state, mstate))
3050 3095 return (0);
3051 3096
3052 3097 /*
3053 3098 * Note that we are assuming that an unanchored probe is
3054 3099 * always due to a high-level interrupt. (And we're assuming
3055 3100 * that there is only a single high level interrupt.)
3056 3101 */
3057 3102 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3058 3103 return (pid0.pid_id);
3059 3104
3060 3105 /*
3061 3106 * It is always safe to dereference one's own t_procp pointer:
3062 3107 * it always points to a valid, allocated proc structure.
3063 3108 * Further, it is always safe to dereference the p_pidp member
3064 3109 * of one's own proc structure. (These are truisms becuase
3065 3110 * threads and processes don't clean up their own state --
3066 3111 * they leave that task to whomever reaps them.)
3067 3112 */
3068 3113 return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
3069 3114
3070 3115 case DIF_VAR_PPID:
3071 3116 if (!dtrace_priv_proc(state, mstate))
3072 3117 return (0);
3073 3118
3074 3119 /*
3075 3120 * See comment in DIF_VAR_PID.
3076 3121 */
3077 3122 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3078 3123 return (pid0.pid_id);
3079 3124
3080 3125 /*
3081 3126 * It is always safe to dereference one's own t_procp pointer:
3082 3127 * it always points to a valid, allocated proc structure.
3083 3128 * (This is true because threads don't clean up their own
3084 3129 * state -- they leave that task to whomever reaps them.)
3085 3130 */
3086 3131 return ((uint64_t)curthread->t_procp->p_ppid);
3087 3132
3088 3133 case DIF_VAR_TID:
3089 3134 /*
3090 3135 * See comment in DIF_VAR_PID.
3091 3136 */
3092 3137 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3093 3138 return (0);
3094 3139
3095 3140 return ((uint64_t)curthread->t_tid);
3096 3141
3097 3142 case DIF_VAR_EXECNAME:
3098 3143 if (!dtrace_priv_proc(state, mstate))
3099 3144 return (0);
3100 3145
3101 3146 /*
3102 3147 * See comment in DIF_VAR_PID.
3103 3148 */
3104 3149 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3105 3150 return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
3106 3151
3107 3152 /*
3108 3153 * It is always safe to dereference one's own t_procp pointer:
3109 3154 * it always points to a valid, allocated proc structure.
3110 3155 * (This is true because threads don't clean up their own
3111 3156 * state -- they leave that task to whomever reaps them.)
3112 3157 */
3113 3158 return (dtrace_dif_varstr(
3114 3159 (uintptr_t)curthread->t_procp->p_user.u_comm,
3115 3160 state, mstate));
3116 3161
3117 3162 case DIF_VAR_ZONENAME:
3118 3163 if (!dtrace_priv_proc(state, mstate))
3119 3164 return (0);
3120 3165
3121 3166 /*
3122 3167 * See comment in DIF_VAR_PID.
3123 3168 */
3124 3169 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3125 3170 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
3126 3171
3127 3172 /*
3128 3173 * It is always safe to dereference one's own t_procp pointer:
3129 3174 * it always points to a valid, allocated proc structure.
3130 3175 * (This is true because threads don't clean up their own
3131 3176 * state -- they leave that task to whomever reaps them.)
3132 3177 */
3133 3178 return (dtrace_dif_varstr(
3134 3179 (uintptr_t)curthread->t_procp->p_zone->zone_name,
3135 3180 state, mstate));
3136 3181
3137 3182 case DIF_VAR_UID:
3138 3183 if (!dtrace_priv_proc(state, mstate))
3139 3184 return (0);
3140 3185
3141 3186 /*
3142 3187 * See comment in DIF_VAR_PID.
3143 3188 */
3144 3189 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3145 3190 return ((uint64_t)p0.p_cred->cr_uid);
3146 3191
3147 3192 /*
3148 3193 * It is always safe to dereference one's own t_procp pointer:
3149 3194 * it always points to a valid, allocated proc structure.
3150 3195 * (This is true because threads don't clean up their own
3151 3196 * state -- they leave that task to whomever reaps them.)
3152 3197 *
3153 3198 * Additionally, it is safe to dereference one's own process
3154 3199 * credential, since this is never NULL after process birth.
3155 3200 */
3156 3201 return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
3157 3202
3158 3203 case DIF_VAR_GID:
3159 3204 if (!dtrace_priv_proc(state, mstate))
3160 3205 return (0);
3161 3206
3162 3207 /*
3163 3208 * See comment in DIF_VAR_PID.
3164 3209 */
3165 3210 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3166 3211 return ((uint64_t)p0.p_cred->cr_gid);
3167 3212
3168 3213 /*
3169 3214 * It is always safe to dereference one's own t_procp pointer:
3170 3215 * it always points to a valid, allocated proc structure.
3171 3216 * (This is true because threads don't clean up their own
3172 3217 * state -- they leave that task to whomever reaps them.)
3173 3218 *
3174 3219 * Additionally, it is safe to dereference one's own process
3175 3220 * credential, since this is never NULL after process birth.
3176 3221 */
3177 3222 return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
3178 3223
3179 3224 case DIF_VAR_ERRNO: {
3180 3225 klwp_t *lwp;
3181 3226 if (!dtrace_priv_proc(state, mstate))
3182 3227 return (0);
3183 3228
3184 3229 /*
3185 3230 * See comment in DIF_VAR_PID.
3186 3231 */
3187 3232 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3188 3233 return (0);
3189 3234
3190 3235 /*
3191 3236 * It is always safe to dereference one's own t_lwp pointer in
3192 3237 * the event that this pointer is non-NULL. (This is true
3193 3238 * because threads and lwps don't clean up their own state --
3194 3239 * they leave that task to whomever reaps them.)
3195 3240 */
3196 3241 if ((lwp = curthread->t_lwp) == NULL)
3197 3242 return (0);
3198 3243
3199 3244 return ((uint64_t)lwp->lwp_errno);
3200 3245 }
3201 3246 default:
3202 3247 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3203 3248 return (0);
3204 3249 }
3205 3250 }
3206 3251
3207 3252 /*
3208 3253 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3209 3254 * Notice that we don't bother validating the proper number of arguments or
3210 3255 * their types in the tuple stack. This isn't needed because all argument
3211 3256 * interpretation is safe because of our load safety -- the worst that can
3212 3257 * happen is that a bogus program can obtain bogus results.
3213 3258 */
3214 3259 static void
3215 3260 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
3216 3261 dtrace_key_t *tupregs, int nargs,
3217 3262 dtrace_mstate_t *mstate, dtrace_state_t *state)
3218 3263 {
3219 3264 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
3220 3265 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
3221 3266 dtrace_vstate_t *vstate = &state->dts_vstate;
3222 3267
3223 3268 union {
3224 3269 mutex_impl_t mi;
3225 3270 uint64_t mx;
3226 3271 } m;
3227 3272
3228 3273 union {
3229 3274 krwlock_t ri;
3230 3275 uintptr_t rw;
3231 3276 } r;
3232 3277
3233 3278 switch (subr) {
3234 3279 case DIF_SUBR_RAND:
3235 3280 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3236 3281 break;
3237 3282
3238 3283 case DIF_SUBR_MUTEX_OWNED:
3239 3284 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3240 3285 mstate, vstate)) {
3241 3286 regs[rd] = NULL;
3242 3287 break;
3243 3288 }
3244 3289
3245 3290 m.mx = dtrace_load64(tupregs[0].dttk_value);
3246 3291 if (MUTEX_TYPE_ADAPTIVE(&m.mi))
3247 3292 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
3248 3293 else
3249 3294 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
3250 3295 break;
3251 3296
3252 3297 case DIF_SUBR_MUTEX_OWNER:
3253 3298 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3254 3299 mstate, vstate)) {
3255 3300 regs[rd] = NULL;
3256 3301 break;
3257 3302 }
3258 3303
3259 3304 m.mx = dtrace_load64(tupregs[0].dttk_value);
3260 3305 if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
3261 3306 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
3262 3307 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
3263 3308 else
3264 3309 regs[rd] = 0;
3265 3310 break;
3266 3311
3267 3312 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
3268 3313 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3269 3314 mstate, vstate)) {
3270 3315 regs[rd] = NULL;
3271 3316 break;
3272 3317 }
3273 3318
3274 3319 m.mx = dtrace_load64(tupregs[0].dttk_value);
3275 3320 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
3276 3321 break;
3277 3322
3278 3323 case DIF_SUBR_MUTEX_TYPE_SPIN:
3279 3324 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3280 3325 mstate, vstate)) {
3281 3326 regs[rd] = NULL;
3282 3327 break;
3283 3328 }
3284 3329
3285 3330 m.mx = dtrace_load64(tupregs[0].dttk_value);
3286 3331 regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
3287 3332 break;
3288 3333
3289 3334 case DIF_SUBR_RW_READ_HELD: {
3290 3335 uintptr_t tmp;
3291 3336
3292 3337 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
3293 3338 mstate, vstate)) {
3294 3339 regs[rd] = NULL;
3295 3340 break;
3296 3341 }
3297 3342
3298 3343 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3299 3344 regs[rd] = _RW_READ_HELD(&r.ri, tmp);
3300 3345 break;
3301 3346 }
3302 3347
3303 3348 case DIF_SUBR_RW_WRITE_HELD:
3304 3349 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3305 3350 mstate, vstate)) {
3306 3351 regs[rd] = NULL;
3307 3352 break;
3308 3353 }
3309 3354
3310 3355 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3311 3356 regs[rd] = _RW_WRITE_HELD(&r.ri);
3312 3357 break;
3313 3358
3314 3359 case DIF_SUBR_RW_ISWRITER:
3315 3360 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3316 3361 mstate, vstate)) {
3317 3362 regs[rd] = NULL;
3318 3363 break;
3319 3364 }
3320 3365
3321 3366 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3322 3367 regs[rd] = _RW_ISWRITER(&r.ri);
3323 3368 break;
3324 3369
3325 3370 case DIF_SUBR_BCOPY: {
3326 3371 /*
3327 3372 * We need to be sure that the destination is in the scratch
3328 3373 * region -- no other region is allowed.
3329 3374 */
3330 3375 uintptr_t src = tupregs[0].dttk_value;
3331 3376 uintptr_t dest = tupregs[1].dttk_value;
3332 3377 size_t size = tupregs[2].dttk_value;
3333 3378
3334 3379 if (!dtrace_inscratch(dest, size, mstate)) {
3335 3380 *flags |= CPU_DTRACE_BADADDR;
3336 3381 *illval = regs[rd];
3337 3382 break;
3338 3383 }
3339 3384
3340 3385 if (!dtrace_canload(src, size, mstate, vstate)) {
3341 3386 regs[rd] = NULL;
3342 3387 break;
3343 3388 }
3344 3389
3345 3390 dtrace_bcopy((void *)src, (void *)dest, size);
3346 3391 break;
3347 3392 }
3348 3393
3349 3394 case DIF_SUBR_ALLOCA:
3350 3395 case DIF_SUBR_COPYIN: {
3351 3396 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
3352 3397 uint64_t size =
3353 3398 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
3354 3399 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
3355 3400
3356 3401 /*
3357 3402 * This action doesn't require any credential checks since
3358 3403 * probes will not activate in user contexts to which the
3359 3404 * enabling user does not have permissions.
3360 3405 */
3361 3406
3362 3407 /*
3363 3408 * Rounding up the user allocation size could have overflowed
3364 3409 * a large, bogus allocation (like -1ULL) to 0.
3365 3410 */
3366 3411 if (scratch_size < size ||
3367 3412 !DTRACE_INSCRATCH(mstate, scratch_size)) {
3368 3413 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3369 3414 regs[rd] = NULL;
3370 3415 break;
3371 3416 }
3372 3417
3373 3418 if (subr == DIF_SUBR_COPYIN) {
3374 3419 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3375 3420 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
3376 3421 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3377 3422 }
3378 3423
3379 3424 mstate->dtms_scratch_ptr += scratch_size;
3380 3425 regs[rd] = dest;
3381 3426 break;
3382 3427 }
3383 3428
3384 3429 case DIF_SUBR_COPYINTO: {
3385 3430 uint64_t size = tupregs[1].dttk_value;
3386 3431 uintptr_t dest = tupregs[2].dttk_value;
3387 3432
3388 3433 /*
3389 3434 * This action doesn't require any credential checks since
3390 3435 * probes will not activate in user contexts to which the
3391 3436 * enabling user does not have permissions.
3392 3437 */
3393 3438 if (!dtrace_inscratch(dest, size, mstate)) {
3394 3439 *flags |= CPU_DTRACE_BADADDR;
3395 3440 *illval = regs[rd];
3396 3441 break;
3397 3442 }
3398 3443
3399 3444 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3400 3445 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
3401 3446 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3402 3447 break;
3403 3448 }
3404 3449
3405 3450 case DIF_SUBR_COPYINSTR: {
3406 3451 uintptr_t dest = mstate->dtms_scratch_ptr;
3407 3452 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3408 3453
3409 3454 if (nargs > 1 && tupregs[1].dttk_value < size)
3410 3455 size = tupregs[1].dttk_value + 1;
3411 3456
3412 3457 /*
3413 3458 * This action doesn't require any credential checks since
3414 3459 * probes will not activate in user contexts to which the
3415 3460 * enabling user does not have permissions.
3416 3461 */
3417 3462 if (!DTRACE_INSCRATCH(mstate, size)) {
3418 3463 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3419 3464 regs[rd] = NULL;
3420 3465 break;
3421 3466 }
3422 3467
3423 3468 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3424 3469 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
3425 3470 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3426 3471
3427 3472 ((char *)dest)[size - 1] = '\0';
3428 3473 mstate->dtms_scratch_ptr += size;
3429 3474 regs[rd] = dest;
3430 3475 break;
3431 3476 }
3432 3477
3433 3478 case DIF_SUBR_MSGSIZE:
3434 3479 case DIF_SUBR_MSGDSIZE: {
3435 3480 uintptr_t baddr = tupregs[0].dttk_value, daddr;
3436 3481 uintptr_t wptr, rptr;
3437 3482 size_t count = 0;
3438 3483 int cont = 0;
3439 3484
3440 3485 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
3441 3486
3442 3487 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
3443 3488 vstate)) {
3444 3489 regs[rd] = NULL;
3445 3490 break;
3446 3491 }
3447 3492
3448 3493 wptr = dtrace_loadptr(baddr +
3449 3494 offsetof(mblk_t, b_wptr));
3450 3495
3451 3496 rptr = dtrace_loadptr(baddr +
3452 3497 offsetof(mblk_t, b_rptr));
3453 3498
3454 3499 if (wptr < rptr) {
3455 3500 *flags |= CPU_DTRACE_BADADDR;
3456 3501 *illval = tupregs[0].dttk_value;
3457 3502 break;
3458 3503 }
3459 3504
3460 3505 daddr = dtrace_loadptr(baddr +
3461 3506 offsetof(mblk_t, b_datap));
3462 3507
3463 3508 baddr = dtrace_loadptr(baddr +
3464 3509 offsetof(mblk_t, b_cont));
3465 3510
3466 3511 /*
3467 3512 * We want to prevent against denial-of-service here,
3468 3513 * so we're only going to search the list for
3469 3514 * dtrace_msgdsize_max mblks.
3470 3515 */
3471 3516 if (cont++ > dtrace_msgdsize_max) {
3472 3517 *flags |= CPU_DTRACE_ILLOP;
3473 3518 break;
3474 3519 }
3475 3520
3476 3521 if (subr == DIF_SUBR_MSGDSIZE) {
3477 3522 if (dtrace_load8(daddr +
3478 3523 offsetof(dblk_t, db_type)) != M_DATA)
3479 3524 continue;
3480 3525 }
3481 3526
3482 3527 count += wptr - rptr;
3483 3528 }
3484 3529
3485 3530 if (!(*flags & CPU_DTRACE_FAULT))
3486 3531 regs[rd] = count;
3487 3532
3488 3533 break;
3489 3534 }
3490 3535
3491 3536 case DIF_SUBR_PROGENYOF: {
3492 3537 pid_t pid = tupregs[0].dttk_value;
3493 3538 proc_t *p;
3494 3539 int rval = 0;
3495 3540
3496 3541 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3497 3542
3498 3543 for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
3499 3544 if (p->p_pidp->pid_id == pid) {
3500 3545 rval = 1;
3501 3546 break;
3502 3547 }
3503 3548 }
3504 3549
3505 3550 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3506 3551
3507 3552 regs[rd] = rval;
3508 3553 break;
3509 3554 }
3510 3555
3511 3556 case DIF_SUBR_SPECULATION:
3512 3557 regs[rd] = dtrace_speculation(state);
3513 3558 break;
3514 3559
3515 3560 case DIF_SUBR_COPYOUT: {
3516 3561 uintptr_t kaddr = tupregs[0].dttk_value;
3517 3562 uintptr_t uaddr = tupregs[1].dttk_value;
3518 3563 uint64_t size = tupregs[2].dttk_value;
3519 3564
3520 3565 if (!dtrace_destructive_disallow &&
3521 3566 dtrace_priv_proc_control(state, mstate) &&
3522 3567 !dtrace_istoxic(kaddr, size)) {
3523 3568 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3524 3569 dtrace_copyout(kaddr, uaddr, size, flags);
3525 3570 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3526 3571 }
3527 3572 break;
3528 3573 }
3529 3574
3530 3575 case DIF_SUBR_COPYOUTSTR: {
3531 3576 uintptr_t kaddr = tupregs[0].dttk_value;
3532 3577 uintptr_t uaddr = tupregs[1].dttk_value;
3533 3578 uint64_t size = tupregs[2].dttk_value;
3534 3579
3535 3580 if (!dtrace_destructive_disallow &&
3536 3581 dtrace_priv_proc_control(state, mstate) &&
3537 3582 !dtrace_istoxic(kaddr, size)) {
3538 3583 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3539 3584 dtrace_copyoutstr(kaddr, uaddr, size, flags);
3540 3585 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3541 3586 }
3542 3587 break;
3543 3588 }
3544 3589
3545 3590 case DIF_SUBR_STRLEN: {
3546 3591 size_t sz;
3547 3592 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
3548 3593 sz = dtrace_strlen((char *)addr,
3549 3594 state->dts_options[DTRACEOPT_STRSIZE]);
3550 3595
3551 3596 if (!dtrace_canload(addr, sz + 1, mstate, vstate)) {
3552 3597 regs[rd] = NULL;
3553 3598 break;
3554 3599 }
3555 3600
3556 3601 regs[rd] = sz;
3557 3602
3558 3603 break;
3559 3604 }
3560 3605
3561 3606 case DIF_SUBR_STRCHR:
3562 3607 case DIF_SUBR_STRRCHR: {
3563 3608 /*
3564 3609 * We're going to iterate over the string looking for the
3565 3610 * specified character. We will iterate until we have reached
3566 3611 * the string length or we have found the character. If this
3567 3612 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3568 3613 * of the specified character instead of the first.
3569 3614 */
3570 3615 uintptr_t saddr = tupregs[0].dttk_value;
3571 3616 uintptr_t addr = tupregs[0].dttk_value;
3572 3617 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE];
3573 3618 char c, target = (char)tupregs[1].dttk_value;
3574 3619
3575 3620 for (regs[rd] = NULL; addr < limit; addr++) {
3576 3621 if ((c = dtrace_load8(addr)) == target) {
3577 3622 regs[rd] = addr;
3578 3623
3579 3624 if (subr == DIF_SUBR_STRCHR)
3580 3625 break;
3581 3626 }
3582 3627
3583 3628 if (c == '\0')
3584 3629 break;
3585 3630 }
3586 3631
3587 3632 if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) {
3588 3633 regs[rd] = NULL;
3589 3634 break;
3590 3635 }
3591 3636
3592 3637 break;
3593 3638 }
3594 3639
3595 3640 case DIF_SUBR_STRSTR:
3596 3641 case DIF_SUBR_INDEX:
3597 3642 case DIF_SUBR_RINDEX: {
3598 3643 /*
3599 3644 * We're going to iterate over the string looking for the
3600 3645 * specified string. We will iterate until we have reached
3601 3646 * the string length or we have found the string. (Yes, this
3602 3647 * is done in the most naive way possible -- but considering
3603 3648 * that the string we're searching for is likely to be
3604 3649 * relatively short, the complexity of Rabin-Karp or similar
3605 3650 * hardly seems merited.)
3606 3651 */
3607 3652 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
3608 3653 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
3609 3654 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3610 3655 size_t len = dtrace_strlen(addr, size);
3611 3656 size_t sublen = dtrace_strlen(substr, size);
3612 3657 char *limit = addr + len, *orig = addr;
3613 3658 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
3614 3659 int inc = 1;
3615 3660
3616 3661 regs[rd] = notfound;
3617 3662
3618 3663 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
3619 3664 regs[rd] = NULL;
3620 3665 break;
3621 3666 }
3622 3667
3623 3668 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
3624 3669 vstate)) {
3625 3670 regs[rd] = NULL;
3626 3671 break;
3627 3672 }
3628 3673
3629 3674 /*
3630 3675 * strstr() and index()/rindex() have similar semantics if
3631 3676 * both strings are the empty string: strstr() returns a
3632 3677 * pointer to the (empty) string, and index() and rindex()
3633 3678 * both return index 0 (regardless of any position argument).
3634 3679 */
3635 3680 if (sublen == 0 && len == 0) {
3636 3681 if (subr == DIF_SUBR_STRSTR)
3637 3682 regs[rd] = (uintptr_t)addr;
3638 3683 else
3639 3684 regs[rd] = 0;
3640 3685 break;
3641 3686 }
3642 3687
3643 3688 if (subr != DIF_SUBR_STRSTR) {
3644 3689 if (subr == DIF_SUBR_RINDEX) {
3645 3690 limit = orig - 1;
3646 3691 addr += len;
3647 3692 inc = -1;
3648 3693 }
3649 3694
3650 3695 /*
3651 3696 * Both index() and rindex() take an optional position
3652 3697 * argument that denotes the starting position.
3653 3698 */
3654 3699 if (nargs == 3) {
3655 3700 int64_t pos = (int64_t)tupregs[2].dttk_value;
3656 3701
3657 3702 /*
3658 3703 * If the position argument to index() is
3659 3704 * negative, Perl implicitly clamps it at
3660 3705 * zero. This semantic is a little surprising
3661 3706 * given the special meaning of negative
3662 3707 * positions to similar Perl functions like
3663 3708 * substr(), but it appears to reflect a
3664 3709 * notion that index() can start from a
3665 3710 * negative index and increment its way up to
3666 3711 * the string. Given this notion, Perl's
3667 3712 * rindex() is at least self-consistent in
3668 3713 * that it implicitly clamps positions greater
3669 3714 * than the string length to be the string
3670 3715 * length. Where Perl completely loses
3671 3716 * coherence, however, is when the specified
3672 3717 * substring is the empty string (""). In
3673 3718 * this case, even if the position is
3674 3719 * negative, rindex() returns 0 -- and even if
3675 3720 * the position is greater than the length,
3676 3721 * index() returns the string length. These
3677 3722 * semantics violate the notion that index()
3678 3723 * should never return a value less than the
3679 3724 * specified position and that rindex() should
3680 3725 * never return a value greater than the
3681 3726 * specified position. (One assumes that
3682 3727 * these semantics are artifacts of Perl's
3683 3728 * implementation and not the results of
3684 3729 * deliberate design -- it beggars belief that
3685 3730 * even Larry Wall could desire such oddness.)
3686 3731 * While in the abstract one would wish for
3687 3732 * consistent position semantics across
3688 3733 * substr(), index() and rindex() -- or at the
3689 3734 * very least self-consistent position
3690 3735 * semantics for index() and rindex() -- we
3691 3736 * instead opt to keep with the extant Perl
3692 3737 * semantics, in all their broken glory. (Do
3693 3738 * we have more desire to maintain Perl's
3694 3739 * semantics than Perl does? Probably.)
3695 3740 */
3696 3741 if (subr == DIF_SUBR_RINDEX) {
3697 3742 if (pos < 0) {
3698 3743 if (sublen == 0)
3699 3744 regs[rd] = 0;
3700 3745 break;
3701 3746 }
3702 3747
3703 3748 if (pos > len)
3704 3749 pos = len;
3705 3750 } else {
3706 3751 if (pos < 0)
3707 3752 pos = 0;
3708 3753
3709 3754 if (pos >= len) {
3710 3755 if (sublen == 0)
3711 3756 regs[rd] = len;
3712 3757 break;
3713 3758 }
3714 3759 }
3715 3760
3716 3761 addr = orig + pos;
3717 3762 }
3718 3763 }
3719 3764
3720 3765 for (regs[rd] = notfound; addr != limit; addr += inc) {
3721 3766 if (dtrace_strncmp(addr, substr, sublen) == 0) {
3722 3767 if (subr != DIF_SUBR_STRSTR) {
3723 3768 /*
3724 3769 * As D index() and rindex() are
3725 3770 * modeled on Perl (and not on awk),
3726 3771 * we return a zero-based (and not a
3727 3772 * one-based) index. (For you Perl
3728 3773 * weenies: no, we're not going to add
3729 3774 * $[ -- and shouldn't you be at a con
3730 3775 * or something?)
3731 3776 */
3732 3777 regs[rd] = (uintptr_t)(addr - orig);
3733 3778 break;
3734 3779 }
3735 3780
3736 3781 ASSERT(subr == DIF_SUBR_STRSTR);
3737 3782 regs[rd] = (uintptr_t)addr;
3738 3783 break;
3739 3784 }
3740 3785 }
3741 3786
3742 3787 break;
3743 3788 }
3744 3789
3745 3790 case DIF_SUBR_STRTOK: {
3746 3791 uintptr_t addr = tupregs[0].dttk_value;
3747 3792 uintptr_t tokaddr = tupregs[1].dttk_value;
3748 3793 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3749 3794 uintptr_t limit, toklimit = tokaddr + size;
3750 3795 uint8_t c, tokmap[32]; /* 256 / 8 */
3751 3796 char *dest = (char *)mstate->dtms_scratch_ptr;
3752 3797 int i;
3753 3798
3754 3799 /*
3755 3800 * Check both the token buffer and (later) the input buffer,
3756 3801 * since both could be non-scratch addresses.
3757 3802 */
3758 3803 if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) {
3759 3804 regs[rd] = NULL;
3760 3805 break;
3761 3806 }
3762 3807
3763 3808 if (!DTRACE_INSCRATCH(mstate, size)) {
3764 3809 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3765 3810 regs[rd] = NULL;
3766 3811 break;
3767 3812 }
3768 3813
3769 3814 if (addr == NULL) {
3770 3815 /*
3771 3816 * If the address specified is NULL, we use our saved
3772 3817 * strtok pointer from the mstate. Note that this
3773 3818 * means that the saved strtok pointer is _only_
3774 3819 * valid within multiple enablings of the same probe --
3775 3820 * it behaves like an implicit clause-local variable.
3776 3821 */
3777 3822 addr = mstate->dtms_strtok;
3778 3823 } else {
3779 3824 /*
3780 3825 * If the user-specified address is non-NULL we must
3781 3826 * access check it. This is the only time we have
3782 3827 * a chance to do so, since this address may reside
3783 3828 * in the string table of this clause-- future calls
3784 3829 * (when we fetch addr from mstate->dtms_strtok)
3785 3830 * would fail this access check.
3786 3831 */
3787 3832 if (!dtrace_strcanload(addr, size, mstate, vstate)) {
3788 3833 regs[rd] = NULL;
3789 3834 break;
3790 3835 }
3791 3836 }
3792 3837
3793 3838 /*
3794 3839 * First, zero the token map, and then process the token
3795 3840 * string -- setting a bit in the map for every character
3796 3841 * found in the token string.
3797 3842 */
3798 3843 for (i = 0; i < sizeof (tokmap); i++)
3799 3844 tokmap[i] = 0;
3800 3845
3801 3846 for (; tokaddr < toklimit; tokaddr++) {
3802 3847 if ((c = dtrace_load8(tokaddr)) == '\0')
3803 3848 break;
3804 3849
3805 3850 ASSERT((c >> 3) < sizeof (tokmap));
3806 3851 tokmap[c >> 3] |= (1 << (c & 0x7));
3807 3852 }
3808 3853
3809 3854 for (limit = addr + size; addr < limit; addr++) {
3810 3855 /*
3811 3856 * We're looking for a character that is _not_ contained
3812 3857 * in the token string.
3813 3858 */
3814 3859 if ((c = dtrace_load8(addr)) == '\0')
3815 3860 break;
3816 3861
3817 3862 if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
3818 3863 break;
3819 3864 }
3820 3865
3821 3866 if (c == '\0') {
3822 3867 /*
3823 3868 * We reached the end of the string without finding
3824 3869 * any character that was not in the token string.
3825 3870 * We return NULL in this case, and we set the saved
3826 3871 * address to NULL as well.
3827 3872 */
3828 3873 regs[rd] = NULL;
3829 3874 mstate->dtms_strtok = NULL;
3830 3875 break;
3831 3876 }
3832 3877
3833 3878 /*
3834 3879 * From here on, we're copying into the destination string.
3835 3880 */
3836 3881 for (i = 0; addr < limit && i < size - 1; addr++) {
3837 3882 if ((c = dtrace_load8(addr)) == '\0')
3838 3883 break;
3839 3884
3840 3885 if (tokmap[c >> 3] & (1 << (c & 0x7)))
3841 3886 break;
3842 3887
3843 3888 ASSERT(i < size);
3844 3889 dest[i++] = c;
3845 3890 }
3846 3891
3847 3892 ASSERT(i < size);
3848 3893 dest[i] = '\0';
3849 3894 regs[rd] = (uintptr_t)dest;
3850 3895 mstate->dtms_scratch_ptr += size;
3851 3896 mstate->dtms_strtok = addr;
3852 3897 break;
3853 3898 }
3854 3899
3855 3900 case DIF_SUBR_SUBSTR: {
3856 3901 uintptr_t s = tupregs[0].dttk_value;
3857 3902 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3858 3903 char *d = (char *)mstate->dtms_scratch_ptr;
3859 3904 int64_t index = (int64_t)tupregs[1].dttk_value;
3860 3905 int64_t remaining = (int64_t)tupregs[2].dttk_value;
3861 3906 size_t len = dtrace_strlen((char *)s, size);
3862 3907 int64_t i;
3863 3908
3864 3909 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
3865 3910 regs[rd] = NULL;
3866 3911 break;
3867 3912 }
3868 3913
3869 3914 if (!DTRACE_INSCRATCH(mstate, size)) {
3870 3915 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3871 3916 regs[rd] = NULL;
3872 3917 break;
3873 3918 }
3874 3919
3875 3920 if (nargs <= 2)
3876 3921 remaining = (int64_t)size;
3877 3922
3878 3923 if (index < 0) {
3879 3924 index += len;
3880 3925
3881 3926 if (index < 0 && index + remaining > 0) {
3882 3927 remaining += index;
3883 3928 index = 0;
3884 3929 }
3885 3930 }
3886 3931
3887 3932 if (index >= len || index < 0) {
3888 3933 remaining = 0;
3889 3934 } else if (remaining < 0) {
3890 3935 remaining += len - index;
3891 3936 } else if (index + remaining > size) {
3892 3937 remaining = size - index;
3893 3938 }
3894 3939
3895 3940 for (i = 0; i < remaining; i++) {
3896 3941 if ((d[i] = dtrace_load8(s + index + i)) == '\0')
3897 3942 break;
3898 3943 }
3899 3944
3900 3945 d[i] = '\0';
3901 3946
3902 3947 mstate->dtms_scratch_ptr += size;
3903 3948 regs[rd] = (uintptr_t)d;
3904 3949 break;
3905 3950 }
3906 3951
3907 3952 case DIF_SUBR_TOUPPER:
3908 3953 case DIF_SUBR_TOLOWER: {
3909 3954 uintptr_t s = tupregs[0].dttk_value;
3910 3955 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3911 3956 char *dest = (char *)mstate->dtms_scratch_ptr, c;
3912 3957 size_t len = dtrace_strlen((char *)s, size);
3913 3958 char lower, upper, convert;
3914 3959 int64_t i;
3915 3960
3916 3961 if (subr == DIF_SUBR_TOUPPER) {
3917 3962 lower = 'a';
3918 3963 upper = 'z';
3919 3964 convert = 'A';
3920 3965 } else {
3921 3966 lower = 'A';
3922 3967 upper = 'Z';
3923 3968 convert = 'a';
3924 3969 }
3925 3970
3926 3971 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
3927 3972 regs[rd] = NULL;
3928 3973 break;
3929 3974 }
3930 3975
3931 3976 if (!DTRACE_INSCRATCH(mstate, size)) {
3932 3977 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3933 3978 regs[rd] = NULL;
3934 3979 break;
3935 3980 }
3936 3981
3937 3982 for (i = 0; i < size - 1; i++) {
3938 3983 if ((c = dtrace_load8(s + i)) == '\0')
3939 3984 break;
3940 3985
3941 3986 if (c >= lower && c <= upper)
3942 3987 c = convert + (c - lower);
3943 3988
3944 3989 dest[i] = c;
3945 3990 }
3946 3991
3947 3992 ASSERT(i < size);
3948 3993 dest[i] = '\0';
3949 3994 regs[rd] = (uintptr_t)dest;
3950 3995 mstate->dtms_scratch_ptr += size;
3951 3996 break;
3952 3997 }
3953 3998
3954 3999 case DIF_SUBR_GETMAJOR:
3955 4000 #ifdef _LP64
3956 4001 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
3957 4002 #else
3958 4003 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
3959 4004 #endif
3960 4005 break;
3961 4006
3962 4007 case DIF_SUBR_GETMINOR:
3963 4008 #ifdef _LP64
3964 4009 regs[rd] = tupregs[0].dttk_value & MAXMIN64;
3965 4010 #else
3966 4011 regs[rd] = tupregs[0].dttk_value & MAXMIN;
3967 4012 #endif
3968 4013 break;
3969 4014
3970 4015 case DIF_SUBR_DDI_PATHNAME: {
3971 4016 /*
3972 4017 * This one is a galactic mess. We are going to roughly
3973 4018 * emulate ddi_pathname(), but it's made more complicated
3974 4019 * by the fact that we (a) want to include the minor name and
3975 4020 * (b) must proceed iteratively instead of recursively.
3976 4021 */
3977 4022 uintptr_t dest = mstate->dtms_scratch_ptr;
3978 4023 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3979 4024 char *start = (char *)dest, *end = start + size - 1;
3980 4025 uintptr_t daddr = tupregs[0].dttk_value;
3981 4026 int64_t minor = (int64_t)tupregs[1].dttk_value;
3982 4027 char *s;
3983 4028 int i, len, depth = 0;
3984 4029
3985 4030 /*
3986 4031 * Due to all the pointer jumping we do and context we must
3987 4032 * rely upon, we just mandate that the user must have kernel
3988 4033 * read privileges to use this routine.
3989 4034 */
3990 4035 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
3991 4036 *flags |= CPU_DTRACE_KPRIV;
3992 4037 *illval = daddr;
3993 4038 regs[rd] = NULL;
3994 4039 }
3995 4040
3996 4041 if (!DTRACE_INSCRATCH(mstate, size)) {
3997 4042 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3998 4043 regs[rd] = NULL;
3999 4044 break;
4000 4045 }
4001 4046
4002 4047 *end = '\0';
4003 4048
4004 4049 /*
4005 4050 * We want to have a name for the minor. In order to do this,
4006 4051 * we need to walk the minor list from the devinfo. We want
4007 4052 * to be sure that we don't infinitely walk a circular list,
4008 4053 * so we check for circularity by sending a scout pointer
4009 4054 * ahead two elements for every element that we iterate over;
4010 4055 * if the list is circular, these will ultimately point to the
4011 4056 * same element. You may recognize this little trick as the
4012 4057 * answer to a stupid interview question -- one that always
4013 4058 * seems to be asked by those who had to have it laboriously
4014 4059 * explained to them, and who can't even concisely describe
4015 4060 * the conditions under which one would be forced to resort to
4016 4061 * this technique. Needless to say, those conditions are
4017 4062 * found here -- and probably only here. Is this the only use
4018 4063 * of this infamous trick in shipping, production code? If it
4019 4064 * isn't, it probably should be...
4020 4065 */
4021 4066 if (minor != -1) {
4022 4067 uintptr_t maddr = dtrace_loadptr(daddr +
4023 4068 offsetof(struct dev_info, devi_minor));
4024 4069
4025 4070 uintptr_t next = offsetof(struct ddi_minor_data, next);
4026 4071 uintptr_t name = offsetof(struct ddi_minor_data,
4027 4072 d_minor) + offsetof(struct ddi_minor, name);
4028 4073 uintptr_t dev = offsetof(struct ddi_minor_data,
4029 4074 d_minor) + offsetof(struct ddi_minor, dev);
4030 4075 uintptr_t scout;
4031 4076
4032 4077 if (maddr != NULL)
4033 4078 scout = dtrace_loadptr(maddr + next);
4034 4079
4035 4080 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4036 4081 uint64_t m;
4037 4082 #ifdef _LP64
4038 4083 m = dtrace_load64(maddr + dev) & MAXMIN64;
4039 4084 #else
4040 4085 m = dtrace_load32(maddr + dev) & MAXMIN;
4041 4086 #endif
4042 4087 if (m != minor) {
4043 4088 maddr = dtrace_loadptr(maddr + next);
4044 4089
4045 4090 if (scout == NULL)
4046 4091 continue;
4047 4092
4048 4093 scout = dtrace_loadptr(scout + next);
4049 4094
4050 4095 if (scout == NULL)
4051 4096 continue;
4052 4097
4053 4098 scout = dtrace_loadptr(scout + next);
4054 4099
4055 4100 if (scout == NULL)
4056 4101 continue;
4057 4102
4058 4103 if (scout == maddr) {
4059 4104 *flags |= CPU_DTRACE_ILLOP;
4060 4105 break;
4061 4106 }
4062 4107
4063 4108 continue;
4064 4109 }
4065 4110
4066 4111 /*
4067 4112 * We have the minor data. Now we need to
4068 4113 * copy the minor's name into the end of the
4069 4114 * pathname.
4070 4115 */
4071 4116 s = (char *)dtrace_loadptr(maddr + name);
4072 4117 len = dtrace_strlen(s, size);
4073 4118
4074 4119 if (*flags & CPU_DTRACE_FAULT)
4075 4120 break;
4076 4121
4077 4122 if (len != 0) {
4078 4123 if ((end -= (len + 1)) < start)
4079 4124 break;
4080 4125
4081 4126 *end = ':';
4082 4127 }
4083 4128
4084 4129 for (i = 1; i <= len; i++)
4085 4130 end[i] = dtrace_load8((uintptr_t)s++);
4086 4131 break;
4087 4132 }
4088 4133 }
4089 4134
4090 4135 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4091 4136 ddi_node_state_t devi_state;
4092 4137
4093 4138 devi_state = dtrace_load32(daddr +
4094 4139 offsetof(struct dev_info, devi_node_state));
4095 4140
4096 4141 if (*flags & CPU_DTRACE_FAULT)
4097 4142 break;
4098 4143
4099 4144 if (devi_state >= DS_INITIALIZED) {
4100 4145 s = (char *)dtrace_loadptr(daddr +
4101 4146 offsetof(struct dev_info, devi_addr));
4102 4147 len = dtrace_strlen(s, size);
4103 4148
4104 4149 if (*flags & CPU_DTRACE_FAULT)
4105 4150 break;
4106 4151
4107 4152 if (len != 0) {
4108 4153 if ((end -= (len + 1)) < start)
4109 4154 break;
4110 4155
4111 4156 *end = '@';
4112 4157 }
4113 4158
4114 4159 for (i = 1; i <= len; i++)
4115 4160 end[i] = dtrace_load8((uintptr_t)s++);
4116 4161 }
4117 4162
4118 4163 /*
4119 4164 * Now for the node name...
4120 4165 */
4121 4166 s = (char *)dtrace_loadptr(daddr +
4122 4167 offsetof(struct dev_info, devi_node_name));
4123 4168
4124 4169 daddr = dtrace_loadptr(daddr +
4125 4170 offsetof(struct dev_info, devi_parent));
4126 4171
4127 4172 /*
4128 4173 * If our parent is NULL (that is, if we're the root
4129 4174 * node), we're going to use the special path
4130 4175 * "devices".
4131 4176 */
4132 4177 if (daddr == NULL)
4133 4178 s = "devices";
4134 4179
4135 4180 len = dtrace_strlen(s, size);
4136 4181 if (*flags & CPU_DTRACE_FAULT)
4137 4182 break;
4138 4183
4139 4184 if ((end -= (len + 1)) < start)
4140 4185 break;
4141 4186
4142 4187 for (i = 1; i <= len; i++)
4143 4188 end[i] = dtrace_load8((uintptr_t)s++);
4144 4189 *end = '/';
4145 4190
4146 4191 if (depth++ > dtrace_devdepth_max) {
4147 4192 *flags |= CPU_DTRACE_ILLOP;
4148 4193 break;
4149 4194 }
4150 4195 }
4151 4196
4152 4197 if (end < start)
4153 4198 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4154 4199
4155 4200 if (daddr == NULL) {
4156 4201 regs[rd] = (uintptr_t)end;
4157 4202 mstate->dtms_scratch_ptr += size;
4158 4203 }
4159 4204
4160 4205 break;
4161 4206 }
4162 4207
4163 4208 case DIF_SUBR_STRJOIN: {
4164 4209 char *d = (char *)mstate->dtms_scratch_ptr;
4165 4210 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4166 4211 uintptr_t s1 = tupregs[0].dttk_value;
4167 4212 uintptr_t s2 = tupregs[1].dttk_value;
4168 4213 int i = 0;
4169 4214
4170 4215 if (!dtrace_strcanload(s1, size, mstate, vstate) ||
4171 4216 !dtrace_strcanload(s2, size, mstate, vstate)) {
4172 4217 regs[rd] = NULL;
4173 4218 break;
4174 4219 }
4175 4220
4176 4221 if (!DTRACE_INSCRATCH(mstate, size)) {
4177 4222 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4178 4223 regs[rd] = NULL;
4179 4224 break;
4180 4225 }
4181 4226
4182 4227 for (;;) {
4183 4228 if (i >= size) {
4184 4229 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4185 4230 regs[rd] = NULL;
4186 4231 break;
4187 4232 }
4188 4233
4189 4234 if ((d[i++] = dtrace_load8(s1++)) == '\0') {
4190 4235 i--;
4191 4236 break;
4192 4237 }
4193 4238 }
4194 4239
4195 4240 for (;;) {
4196 4241 if (i >= size) {
4197 4242 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4198 4243 regs[rd] = NULL;
4199 4244 break;
4200 4245 }
4201 4246
4202 4247 if ((d[i++] = dtrace_load8(s2++)) == '\0')
4203 4248 break;
4204 4249 }
4205 4250
4206 4251 if (i < size) {
4207 4252 mstate->dtms_scratch_ptr += i;
4208 4253 regs[rd] = (uintptr_t)d;
4209 4254 }
4210 4255
4211 4256 break;
4212 4257 }
4213 4258
4214 4259 case DIF_SUBR_LLTOSTR: {
4215 4260 int64_t i = (int64_t)tupregs[0].dttk_value;
4216 4261 uint64_t val, digit;
4217 4262 uint64_t size = 65; /* enough room for 2^64 in binary */
4218 4263 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
4219 4264 int base = 10;
4220 4265
4221 4266 if (nargs > 1) {
4222 4267 if ((base = tupregs[1].dttk_value) <= 1 ||
4223 4268 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
4224 4269 *flags |= CPU_DTRACE_ILLOP;
4225 4270 break;
4226 4271 }
4227 4272 }
4228 4273
4229 4274 val = (base == 10 && i < 0) ? i * -1 : i;
4230 4275
4231 4276 if (!DTRACE_INSCRATCH(mstate, size)) {
4232 4277 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4233 4278 regs[rd] = NULL;
4234 4279 break;
4235 4280 }
4236 4281
4237 4282 for (*end-- = '\0'; val; val /= base) {
4238 4283 if ((digit = val % base) <= '9' - '0') {
4239 4284 *end-- = '0' + digit;
4240 4285 } else {
4241 4286 *end-- = 'a' + (digit - ('9' - '0') - 1);
4242 4287 }
4243 4288 }
4244 4289
4245 4290 if (i == 0 && base == 16)
4246 4291 *end-- = '0';
4247 4292
4248 4293 if (base == 16)
4249 4294 *end-- = 'x';
4250 4295
4251 4296 if (i == 0 || base == 8 || base == 16)
4252 4297 *end-- = '0';
4253 4298
4254 4299 if (i < 0 && base == 10)
4255 4300 *end-- = '-';
4256 4301
4257 4302 regs[rd] = (uintptr_t)end + 1;
4258 4303 mstate->dtms_scratch_ptr += size;
4259 4304 break;
4260 4305 }
4261 4306
4262 4307 case DIF_SUBR_HTONS:
4263 4308 case DIF_SUBR_NTOHS:
4264 4309 #ifdef _BIG_ENDIAN
4265 4310 regs[rd] = (uint16_t)tupregs[0].dttk_value;
4266 4311 #else
4267 4312 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
4268 4313 #endif
4269 4314 break;
4270 4315
4271 4316
4272 4317 case DIF_SUBR_HTONL:
4273 4318 case DIF_SUBR_NTOHL:
4274 4319 #ifdef _BIG_ENDIAN
4275 4320 regs[rd] = (uint32_t)tupregs[0].dttk_value;
4276 4321 #else
4277 4322 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
4278 4323 #endif
4279 4324 break;
4280 4325
4281 4326
4282 4327 case DIF_SUBR_HTONLL:
4283 4328 case DIF_SUBR_NTOHLL:
4284 4329 #ifdef _BIG_ENDIAN
4285 4330 regs[rd] = (uint64_t)tupregs[0].dttk_value;
4286 4331 #else
4287 4332 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
4288 4333 #endif
4289 4334 break;
4290 4335
4291 4336
4292 4337 case DIF_SUBR_DIRNAME:
4293 4338 case DIF_SUBR_BASENAME: {
4294 4339 char *dest = (char *)mstate->dtms_scratch_ptr;
4295 4340 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4296 4341 uintptr_t src = tupregs[0].dttk_value;
4297 4342 int i, j, len = dtrace_strlen((char *)src, size);
4298 4343 int lastbase = -1, firstbase = -1, lastdir = -1;
4299 4344 int start, end;
4300 4345
4301 4346 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
4302 4347 regs[rd] = NULL;
4303 4348 break;
4304 4349 }
4305 4350
4306 4351 if (!DTRACE_INSCRATCH(mstate, size)) {
4307 4352 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4308 4353 regs[rd] = NULL;
4309 4354 break;
4310 4355 }
4311 4356
4312 4357 /*
4313 4358 * The basename and dirname for a zero-length string is
4314 4359 * defined to be "."
4315 4360 */
4316 4361 if (len == 0) {
4317 4362 len = 1;
4318 4363 src = (uintptr_t)".";
4319 4364 }
4320 4365
4321 4366 /*
4322 4367 * Start from the back of the string, moving back toward the
4323 4368 * front until we see a character that isn't a slash. That
4324 4369 * character is the last character in the basename.
4325 4370 */
4326 4371 for (i = len - 1; i >= 0; i--) {
4327 4372 if (dtrace_load8(src + i) != '/')
4328 4373 break;
4329 4374 }
4330 4375
4331 4376 if (i >= 0)
4332 4377 lastbase = i;
4333 4378
4334 4379 /*
4335 4380 * Starting from the last character in the basename, move
4336 4381 * towards the front until we find a slash. The character
4337 4382 * that we processed immediately before that is the first
4338 4383 * character in the basename.
4339 4384 */
4340 4385 for (; i >= 0; i--) {
4341 4386 if (dtrace_load8(src + i) == '/')
4342 4387 break;
4343 4388 }
4344 4389
4345 4390 if (i >= 0)
4346 4391 firstbase = i + 1;
4347 4392
4348 4393 /*
4349 4394 * Now keep going until we find a non-slash character. That
4350 4395 * character is the last character in the dirname.
4351 4396 */
4352 4397 for (; i >= 0; i--) {
4353 4398 if (dtrace_load8(src + i) != '/')
4354 4399 break;
4355 4400 }
4356 4401
4357 4402 if (i >= 0)
4358 4403 lastdir = i;
4359 4404
4360 4405 ASSERT(!(lastbase == -1 && firstbase != -1));
4361 4406 ASSERT(!(firstbase == -1 && lastdir != -1));
4362 4407
4363 4408 if (lastbase == -1) {
4364 4409 /*
4365 4410 * We didn't find a non-slash character. We know that
4366 4411 * the length is non-zero, so the whole string must be
4367 4412 * slashes. In either the dirname or the basename
4368 4413 * case, we return '/'.
4369 4414 */
4370 4415 ASSERT(firstbase == -1);
4371 4416 firstbase = lastbase = lastdir = 0;
4372 4417 }
4373 4418
4374 4419 if (firstbase == -1) {
4375 4420 /*
4376 4421 * The entire string consists only of a basename
4377 4422 * component. If we're looking for dirname, we need
4378 4423 * to change our string to be just "."; if we're
4379 4424 * looking for a basename, we'll just set the first
4380 4425 * character of the basename to be 0.
4381 4426 */
4382 4427 if (subr == DIF_SUBR_DIRNAME) {
4383 4428 ASSERT(lastdir == -1);
4384 4429 src = (uintptr_t)".";
4385 4430 lastdir = 0;
4386 4431 } else {
4387 4432 firstbase = 0;
4388 4433 }
4389 4434 }
4390 4435
4391 4436 if (subr == DIF_SUBR_DIRNAME) {
4392 4437 if (lastdir == -1) {
4393 4438 /*
4394 4439 * We know that we have a slash in the name --
4395 4440 * or lastdir would be set to 0, above. And
4396 4441 * because lastdir is -1, we know that this
4397 4442 * slash must be the first character. (That
4398 4443 * is, the full string must be of the form
4399 4444 * "/basename".) In this case, the last
4400 4445 * character of the directory name is 0.
4401 4446 */
4402 4447 lastdir = 0;
4403 4448 }
4404 4449
4405 4450 start = 0;
4406 4451 end = lastdir;
4407 4452 } else {
4408 4453 ASSERT(subr == DIF_SUBR_BASENAME);
4409 4454 ASSERT(firstbase != -1 && lastbase != -1);
4410 4455 start = firstbase;
4411 4456 end = lastbase;
4412 4457 }
4413 4458
4414 4459 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4415 4460 dest[j] = dtrace_load8(src + i);
4416 4461
4417 4462 dest[j] = '\0';
4418 4463 regs[rd] = (uintptr_t)dest;
4419 4464 mstate->dtms_scratch_ptr += size;
4420 4465 break;
4421 4466 }
4422 4467
4423 4468 case DIF_SUBR_CLEANPATH: {
4424 4469 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4425 4470 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4426 4471 uintptr_t src = tupregs[0].dttk_value;
4427 4472 int i = 0, j = 0;
4428 4473
4429 4474 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4430 4475 regs[rd] = NULL;
4431 4476 break;
4432 4477 }
4433 4478
4434 4479 if (!DTRACE_INSCRATCH(mstate, size)) {
4435 4480 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4436 4481 regs[rd] = NULL;
4437 4482 break;
4438 4483 }
4439 4484
4440 4485 /*
4441 4486 * Move forward, loading each character.
4442 4487 */
4443 4488 do {
4444 4489 c = dtrace_load8(src + i++);
4445 4490 next:
4446 4491 if (j + 5 >= size) /* 5 = strlen("/..c\0") */
4447 4492 break;
4448 4493
4449 4494 if (c != '/') {
4450 4495 dest[j++] = c;
4451 4496 continue;
4452 4497 }
4453 4498
4454 4499 c = dtrace_load8(src + i++);
4455 4500
4456 4501 if (c == '/') {
4457 4502 /*
4458 4503 * We have two slashes -- we can just advance
4459 4504 * to the next character.
4460 4505 */
4461 4506 goto next;
4462 4507 }
4463 4508
4464 4509 if (c != '.') {
4465 4510 /*
4466 4511 * This is not "." and it's not ".." -- we can
4467 4512 * just store the "/" and this character and
4468 4513 * drive on.
4469 4514 */
4470 4515 dest[j++] = '/';
4471 4516 dest[j++] = c;
4472 4517 continue;
4473 4518 }
4474 4519
4475 4520 c = dtrace_load8(src + i++);
4476 4521
4477 4522 if (c == '/') {
4478 4523 /*
4479 4524 * This is a "/./" component. We're not going
4480 4525 * to store anything in the destination buffer;
4481 4526 * we're just going to go to the next component.
4482 4527 */
4483 4528 goto next;
4484 4529 }
4485 4530
4486 4531 if (c != '.') {
4487 4532 /*
4488 4533 * This is not ".." -- we can just store the
4489 4534 * "/." and this character and continue
4490 4535 * processing.
4491 4536 */
4492 4537 dest[j++] = '/';
4493 4538 dest[j++] = '.';
4494 4539 dest[j++] = c;
4495 4540 continue;
4496 4541 }
4497 4542
4498 4543 c = dtrace_load8(src + i++);
4499 4544
4500 4545 if (c != '/' && c != '\0') {
4501 4546 /*
4502 4547 * This is not ".." -- it's "..[mumble]".
4503 4548 * We'll store the "/.." and this character
4504 4549 * and continue processing.
4505 4550 */
4506 4551 dest[j++] = '/';
4507 4552 dest[j++] = '.';
4508 4553 dest[j++] = '.';
4509 4554 dest[j++] = c;
4510 4555 continue;
4511 4556 }
4512 4557
4513 4558 /*
4514 4559 * This is "/../" or "/..\0". We need to back up
4515 4560 * our destination pointer until we find a "/".
4516 4561 */
4517 4562 i--;
4518 4563 while (j != 0 && dest[--j] != '/')
4519 4564 continue;
4520 4565
4521 4566 if (c == '\0')
4522 4567 dest[++j] = '/';
4523 4568 } while (c != '\0');
4524 4569
4525 4570 dest[j] = '\0';
4526 4571 regs[rd] = (uintptr_t)dest;
4527 4572 mstate->dtms_scratch_ptr += size;
4528 4573 break;
4529 4574 }
4530 4575
4531 4576 case DIF_SUBR_INET_NTOA:
4532 4577 case DIF_SUBR_INET_NTOA6:
4533 4578 case DIF_SUBR_INET_NTOP: {
4534 4579 size_t size;
4535 4580 int af, argi, i;
4536 4581 char *base, *end;
4537 4582
4538 4583 if (subr == DIF_SUBR_INET_NTOP) {
4539 4584 af = (int)tupregs[0].dttk_value;
4540 4585 argi = 1;
4541 4586 } else {
4542 4587 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4543 4588 argi = 0;
4544 4589 }
4545 4590
4546 4591 if (af == AF_INET) {
4547 4592 ipaddr_t ip4;
4548 4593 uint8_t *ptr8, val;
4549 4594
4550 4595 /*
4551 4596 * Safely load the IPv4 address.
4552 4597 */
4553 4598 ip4 = dtrace_load32(tupregs[argi].dttk_value);
4554 4599
4555 4600 /*
4556 4601 * Check an IPv4 string will fit in scratch.
4557 4602 */
4558 4603 size = INET_ADDRSTRLEN;
4559 4604 if (!DTRACE_INSCRATCH(mstate, size)) {
4560 4605 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4561 4606 regs[rd] = NULL;
4562 4607 break;
4563 4608 }
4564 4609 base = (char *)mstate->dtms_scratch_ptr;
4565 4610 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4566 4611
4567 4612 /*
4568 4613 * Stringify as a dotted decimal quad.
4569 4614 */
4570 4615 *end-- = '\0';
4571 4616 ptr8 = (uint8_t *)&ip4;
4572 4617 for (i = 3; i >= 0; i--) {
4573 4618 val = ptr8[i];
4574 4619
4575 4620 if (val == 0) {
4576 4621 *end-- = '0';
4577 4622 } else {
4578 4623 for (; val; val /= 10) {
4579 4624 *end-- = '0' + (val % 10);
4580 4625 }
4581 4626 }
4582 4627
4583 4628 if (i > 0)
4584 4629 *end-- = '.';
4585 4630 }
4586 4631 ASSERT(end + 1 >= base);
4587 4632
4588 4633 } else if (af == AF_INET6) {
4589 4634 struct in6_addr ip6;
4590 4635 int firstzero, tryzero, numzero, v6end;
4591 4636 uint16_t val;
4592 4637 const char digits[] = "0123456789abcdef";
4593 4638
4594 4639 /*
4595 4640 * Stringify using RFC 1884 convention 2 - 16 bit
4596 4641 * hexadecimal values with a zero-run compression.
4597 4642 * Lower case hexadecimal digits are used.
4598 4643 * eg, fe80::214:4fff:fe0b:76c8.
4599 4644 * The IPv4 embedded form is returned for inet_ntop,
4600 4645 * just the IPv4 string is returned for inet_ntoa6.
4601 4646 */
4602 4647
4603 4648 /*
4604 4649 * Safely load the IPv6 address.
4605 4650 */
4606 4651 dtrace_bcopy(
4607 4652 (void *)(uintptr_t)tupregs[argi].dttk_value,
4608 4653 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
4609 4654
4610 4655 /*
4611 4656 * Check an IPv6 string will fit in scratch.
4612 4657 */
4613 4658 size = INET6_ADDRSTRLEN;
4614 4659 if (!DTRACE_INSCRATCH(mstate, size)) {
4615 4660 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4616 4661 regs[rd] = NULL;
4617 4662 break;
4618 4663 }
4619 4664 base = (char *)mstate->dtms_scratch_ptr;
4620 4665 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4621 4666 *end-- = '\0';
4622 4667
4623 4668 /*
4624 4669 * Find the longest run of 16 bit zero values
4625 4670 * for the single allowed zero compression - "::".
4626 4671 */
4627 4672 firstzero = -1;
4628 4673 tryzero = -1;
4629 4674 numzero = 1;
4630 4675 for (i = 0; i < sizeof (struct in6_addr); i++) {
4631 4676 if (ip6._S6_un._S6_u8[i] == 0 &&
4632 4677 tryzero == -1 && i % 2 == 0) {
4633 4678 tryzero = i;
4634 4679 continue;
4635 4680 }
4636 4681
4637 4682 if (tryzero != -1 &&
4638 4683 (ip6._S6_un._S6_u8[i] != 0 ||
4639 4684 i == sizeof (struct in6_addr) - 1)) {
4640 4685
4641 4686 if (i - tryzero <= numzero) {
4642 4687 tryzero = -1;
4643 4688 continue;
4644 4689 }
4645 4690
4646 4691 firstzero = tryzero;
4647 4692 numzero = i - i % 2 - tryzero;
4648 4693 tryzero = -1;
4649 4694
4650 4695 if (ip6._S6_un._S6_u8[i] == 0 &&
4651 4696 i == sizeof (struct in6_addr) - 1)
4652 4697 numzero += 2;
4653 4698 }
4654 4699 }
4655 4700 ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
4656 4701
4657 4702 /*
4658 4703 * Check for an IPv4 embedded address.
4659 4704 */
4660 4705 v6end = sizeof (struct in6_addr) - 2;
4661 4706 if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
4662 4707 IN6_IS_ADDR_V4COMPAT(&ip6)) {
4663 4708 for (i = sizeof (struct in6_addr) - 1;
4664 4709 i >= DTRACE_V4MAPPED_OFFSET; i--) {
4665 4710 ASSERT(end >= base);
4666 4711
4667 4712 val = ip6._S6_un._S6_u8[i];
4668 4713
4669 4714 if (val == 0) {
4670 4715 *end-- = '0';
4671 4716 } else {
4672 4717 for (; val; val /= 10) {
4673 4718 *end-- = '0' + val % 10;
4674 4719 }
4675 4720 }
4676 4721
4677 4722 if (i > DTRACE_V4MAPPED_OFFSET)
4678 4723 *end-- = '.';
4679 4724 }
4680 4725
4681 4726 if (subr == DIF_SUBR_INET_NTOA6)
4682 4727 goto inetout;
4683 4728
4684 4729 /*
4685 4730 * Set v6end to skip the IPv4 address that
4686 4731 * we have already stringified.
4687 4732 */
4688 4733 v6end = 10;
4689 4734 }
4690 4735
4691 4736 /*
4692 4737 * Build the IPv6 string by working through the
4693 4738 * address in reverse.
4694 4739 */
4695 4740 for (i = v6end; i >= 0; i -= 2) {
4696 4741 ASSERT(end >= base);
4697 4742
4698 4743 if (i == firstzero + numzero - 2) {
4699 4744 *end-- = ':';
4700 4745 *end-- = ':';
4701 4746 i -= numzero - 2;
4702 4747 continue;
4703 4748 }
4704 4749
4705 4750 if (i < 14 && i != firstzero - 2)
4706 4751 *end-- = ':';
4707 4752
4708 4753 val = (ip6._S6_un._S6_u8[i] << 8) +
4709 4754 ip6._S6_un._S6_u8[i + 1];
4710 4755
4711 4756 if (val == 0) {
4712 4757 *end-- = '0';
4713 4758 } else {
4714 4759 for (; val; val /= 16) {
4715 4760 *end-- = digits[val % 16];
4716 4761 }
4717 4762 }
4718 4763 }
4719 4764 ASSERT(end + 1 >= base);
4720 4765
4721 4766 } else {
4722 4767 /*
4723 4768 * The user didn't use AH_INET or AH_INET6.
4724 4769 */
4725 4770 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
4726 4771 regs[rd] = NULL;
4727 4772 break;
4728 4773 }
4729 4774
4730 4775 inetout: regs[rd] = (uintptr_t)end + 1;
4731 4776 mstate->dtms_scratch_ptr += size;
4732 4777 break;
4733 4778 }
4734 4779
4735 4780 }
4736 4781 }
4737 4782
4738 4783 /*
4739 4784 * Emulate the execution of DTrace IR instructions specified by the given
4740 4785 * DIF object. This function is deliberately void of assertions as all of
4741 4786 * the necessary checks are handled by a call to dtrace_difo_validate().
4742 4787 */
4743 4788 static uint64_t
4744 4789 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
4745 4790 dtrace_vstate_t *vstate, dtrace_state_t *state)
4746 4791 {
4747 4792 const dif_instr_t *text = difo->dtdo_buf;
4748 4793 const uint_t textlen = difo->dtdo_len;
4749 4794 const char *strtab = difo->dtdo_strtab;
4750 4795 const uint64_t *inttab = difo->dtdo_inttab;
4751 4796
4752 4797 uint64_t rval = 0;
4753 4798 dtrace_statvar_t *svar;
4754 4799 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
4755 4800 dtrace_difv_t *v;
4756 4801 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
4757 4802 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
4758 4803
4759 4804 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
4760 4805 uint64_t regs[DIF_DIR_NREGS];
4761 4806 uint64_t *tmp;
4762 4807
4763 4808 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
4764 4809 int64_t cc_r;
4765 4810 uint_t pc = 0, id, opc;
4766 4811 uint8_t ttop = 0;
4767 4812 dif_instr_t instr;
4768 4813 uint_t r1, r2, rd;
4769 4814
4770 4815 /*
4771 4816 * We stash the current DIF object into the machine state: we need it
4772 4817 * for subsequent access checking.
4773 4818 */
4774 4819 mstate->dtms_difo = difo;
4775 4820
4776 4821 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
4777 4822
4778 4823 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
4779 4824 opc = pc;
4780 4825
4781 4826 instr = text[pc++];
4782 4827 r1 = DIF_INSTR_R1(instr);
4783 4828 r2 = DIF_INSTR_R2(instr);
4784 4829 rd = DIF_INSTR_RD(instr);
4785 4830
4786 4831 switch (DIF_INSTR_OP(instr)) {
4787 4832 case DIF_OP_OR:
4788 4833 regs[rd] = regs[r1] | regs[r2];
4789 4834 break;
4790 4835 case DIF_OP_XOR:
4791 4836 regs[rd] = regs[r1] ^ regs[r2];
4792 4837 break;
4793 4838 case DIF_OP_AND:
4794 4839 regs[rd] = regs[r1] & regs[r2];
4795 4840 break;
4796 4841 case DIF_OP_SLL:
4797 4842 regs[rd] = regs[r1] << regs[r2];
4798 4843 break;
4799 4844 case DIF_OP_SRL:
4800 4845 regs[rd] = regs[r1] >> regs[r2];
4801 4846 break;
4802 4847 case DIF_OP_SUB:
4803 4848 regs[rd] = regs[r1] - regs[r2];
4804 4849 break;
4805 4850 case DIF_OP_ADD:
4806 4851 regs[rd] = regs[r1] + regs[r2];
4807 4852 break;
4808 4853 case DIF_OP_MUL:
4809 4854 regs[rd] = regs[r1] * regs[r2];
4810 4855 break;
4811 4856 case DIF_OP_SDIV:
4812 4857 if (regs[r2] == 0) {
4813 4858 regs[rd] = 0;
4814 4859 *flags |= CPU_DTRACE_DIVZERO;
4815 4860 } else {
4816 4861 regs[rd] = (int64_t)regs[r1] /
4817 4862 (int64_t)regs[r2];
4818 4863 }
4819 4864 break;
4820 4865
4821 4866 case DIF_OP_UDIV:
4822 4867 if (regs[r2] == 0) {
4823 4868 regs[rd] = 0;
4824 4869 *flags |= CPU_DTRACE_DIVZERO;
4825 4870 } else {
4826 4871 regs[rd] = regs[r1] / regs[r2];
4827 4872 }
4828 4873 break;
4829 4874
4830 4875 case DIF_OP_SREM:
4831 4876 if (regs[r2] == 0) {
4832 4877 regs[rd] = 0;
4833 4878 *flags |= CPU_DTRACE_DIVZERO;
4834 4879 } else {
4835 4880 regs[rd] = (int64_t)regs[r1] %
4836 4881 (int64_t)regs[r2];
4837 4882 }
4838 4883 break;
4839 4884
4840 4885 case DIF_OP_UREM:
4841 4886 if (regs[r2] == 0) {
4842 4887 regs[rd] = 0;
4843 4888 *flags |= CPU_DTRACE_DIVZERO;
4844 4889 } else {
4845 4890 regs[rd] = regs[r1] % regs[r2];
4846 4891 }
4847 4892 break;
4848 4893
4849 4894 case DIF_OP_NOT:
4850 4895 regs[rd] = ~regs[r1];
4851 4896 break;
4852 4897 case DIF_OP_MOV:
4853 4898 regs[rd] = regs[r1];
4854 4899 break;
4855 4900 case DIF_OP_CMP:
4856 4901 cc_r = regs[r1] - regs[r2];
4857 4902 cc_n = cc_r < 0;
4858 4903 cc_z = cc_r == 0;
4859 4904 cc_v = 0;
4860 4905 cc_c = regs[r1] < regs[r2];
4861 4906 break;
4862 4907 case DIF_OP_TST:
4863 4908 cc_n = cc_v = cc_c = 0;
4864 4909 cc_z = regs[r1] == 0;
4865 4910 break;
4866 4911 case DIF_OP_BA:
4867 4912 pc = DIF_INSTR_LABEL(instr);
4868 4913 break;
4869 4914 case DIF_OP_BE:
4870 4915 if (cc_z)
4871 4916 pc = DIF_INSTR_LABEL(instr);
4872 4917 break;
4873 4918 case DIF_OP_BNE:
4874 4919 if (cc_z == 0)
4875 4920 pc = DIF_INSTR_LABEL(instr);
4876 4921 break;
4877 4922 case DIF_OP_BG:
4878 4923 if ((cc_z | (cc_n ^ cc_v)) == 0)
4879 4924 pc = DIF_INSTR_LABEL(instr);
4880 4925 break;
4881 4926 case DIF_OP_BGU:
4882 4927 if ((cc_c | cc_z) == 0)
4883 4928 pc = DIF_INSTR_LABEL(instr);
4884 4929 break;
4885 4930 case DIF_OP_BGE:
4886 4931 if ((cc_n ^ cc_v) == 0)
4887 4932 pc = DIF_INSTR_LABEL(instr);
4888 4933 break;
4889 4934 case DIF_OP_BGEU:
4890 4935 if (cc_c == 0)
4891 4936 pc = DIF_INSTR_LABEL(instr);
4892 4937 break;
4893 4938 case DIF_OP_BL:
4894 4939 if (cc_n ^ cc_v)
4895 4940 pc = DIF_INSTR_LABEL(instr);
4896 4941 break;
4897 4942 case DIF_OP_BLU:
4898 4943 if (cc_c)
4899 4944 pc = DIF_INSTR_LABEL(instr);
↓ open down ↓ |
1994 lines elided |
↑ open up ↑ |
4900 4945 break;
4901 4946 case DIF_OP_BLE:
4902 4947 if (cc_z | (cc_n ^ cc_v))
4903 4948 pc = DIF_INSTR_LABEL(instr);
4904 4949 break;
4905 4950 case DIF_OP_BLEU:
4906 4951 if (cc_c | cc_z)
4907 4952 pc = DIF_INSTR_LABEL(instr);
4908 4953 break;
4909 4954 case DIF_OP_RLDSB:
4910 - if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4911 - *flags |= CPU_DTRACE_KPRIV;
4912 - *illval = regs[r1];
4955 + if (!dtrace_canload(regs[r1], 1, mstate, vstate))
4913 4956 break;
4914 - }
4915 4957 /*FALLTHROUGH*/
4916 4958 case DIF_OP_LDSB:
4917 4959 regs[rd] = (int8_t)dtrace_load8(regs[r1]);
4918 4960 break;
4919 4961 case DIF_OP_RLDSH:
4920 - if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4921 - *flags |= CPU_DTRACE_KPRIV;
4922 - *illval = regs[r1];
4962 + if (!dtrace_canload(regs[r1], 2, mstate, vstate))
4923 4963 break;
4924 - }
4925 4964 /*FALLTHROUGH*/
4926 4965 case DIF_OP_LDSH:
4927 4966 regs[rd] = (int16_t)dtrace_load16(regs[r1]);
4928 4967 break;
4929 4968 case DIF_OP_RLDSW:
4930 - if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4931 - *flags |= CPU_DTRACE_KPRIV;
4932 - *illval = regs[r1];
4969 + if (!dtrace_canload(regs[r1], 4, mstate, vstate))
4933 4970 break;
4934 - }
4935 4971 /*FALLTHROUGH*/
4936 4972 case DIF_OP_LDSW:
4937 4973 regs[rd] = (int32_t)dtrace_load32(regs[r1]);
4938 4974 break;
4939 4975 case DIF_OP_RLDUB:
4940 - if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4941 - *flags |= CPU_DTRACE_KPRIV;
4942 - *illval = regs[r1];
4976 + if (!dtrace_canload(regs[r1], 1, mstate, vstate))
4943 4977 break;
4944 - }
4945 4978 /*FALLTHROUGH*/
4946 4979 case DIF_OP_LDUB:
4947 4980 regs[rd] = dtrace_load8(regs[r1]);
4948 4981 break;
4949 4982 case DIF_OP_RLDUH:
4950 - if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4951 - *flags |= CPU_DTRACE_KPRIV;
4952 - *illval = regs[r1];
4983 + if (!dtrace_canload(regs[r1], 2, mstate, vstate))
4953 4984 break;
4954 - }
4955 4985 /*FALLTHROUGH*/
4956 4986 case DIF_OP_LDUH:
4957 4987 regs[rd] = dtrace_load16(regs[r1]);
4958 4988 break;
4959 4989 case DIF_OP_RLDUW:
4960 - if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4961 - *flags |= CPU_DTRACE_KPRIV;
4962 - *illval = regs[r1];
4990 + if (!dtrace_canload(regs[r1], 4, mstate, vstate))
4963 4991 break;
4964 - }
4965 4992 /*FALLTHROUGH*/
4966 4993 case DIF_OP_LDUW:
4967 4994 regs[rd] = dtrace_load32(regs[r1]);
4968 4995 break;
4969 4996 case DIF_OP_RLDX:
4970 - if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
4971 - *flags |= CPU_DTRACE_KPRIV;
4972 - *illval = regs[r1];
4997 + if (!dtrace_canload(regs[r1], 8, mstate, vstate))
4973 4998 break;
4974 - }
4975 4999 /*FALLTHROUGH*/
4976 5000 case DIF_OP_LDX:
4977 5001 regs[rd] = dtrace_load64(regs[r1]);
4978 5002 break;
4979 5003 case DIF_OP_ULDSB:
4980 5004 regs[rd] = (int8_t)
4981 5005 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
4982 5006 break;
4983 5007 case DIF_OP_ULDSH:
4984 5008 regs[rd] = (int16_t)
4985 5009 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
4986 5010 break;
4987 5011 case DIF_OP_ULDSW:
4988 5012 regs[rd] = (int32_t)
4989 5013 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
4990 5014 break;
4991 5015 case DIF_OP_ULDUB:
4992 5016 regs[rd] =
4993 5017 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
4994 5018 break;
4995 5019 case DIF_OP_ULDUH:
4996 5020 regs[rd] =
4997 5021 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
4998 5022 break;
4999 5023 case DIF_OP_ULDUW:
5000 5024 regs[rd] =
5001 5025 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5002 5026 break;
5003 5027 case DIF_OP_ULDX:
5004 5028 regs[rd] =
5005 5029 dtrace_fuword64((void *)(uintptr_t)regs[r1]);
5006 5030 break;
5007 5031 case DIF_OP_RET:
5008 5032 rval = regs[rd];
5009 5033 pc = textlen;
5010 5034 break;
5011 5035 case DIF_OP_NOP:
5012 5036 break;
5013 5037 case DIF_OP_SETX:
5014 5038 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
5015 5039 break;
5016 5040 case DIF_OP_SETS:
5017 5041 regs[rd] = (uint64_t)(uintptr_t)
5018 5042 (strtab + DIF_INSTR_STRING(instr));
5019 5043 break;
5020 5044 case DIF_OP_SCMP: {
5021 5045 size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
5022 5046 uintptr_t s1 = regs[r1];
5023 5047 uintptr_t s2 = regs[r2];
5024 5048
5025 5049 if (s1 != NULL &&
5026 5050 !dtrace_strcanload(s1, sz, mstate, vstate))
5027 5051 break;
5028 5052 if (s2 != NULL &&
5029 5053 !dtrace_strcanload(s2, sz, mstate, vstate))
5030 5054 break;
5031 5055
5032 5056 cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz);
5033 5057
5034 5058 cc_n = cc_r < 0;
5035 5059 cc_z = cc_r == 0;
5036 5060 cc_v = cc_c = 0;
5037 5061 break;
5038 5062 }
5039 5063 case DIF_OP_LDGA:
5040 5064 regs[rd] = dtrace_dif_variable(mstate, state,
5041 5065 r1, regs[r2]);
5042 5066 break;
5043 5067 case DIF_OP_LDGS:
5044 5068 id = DIF_INSTR_VAR(instr);
5045 5069
5046 5070 if (id >= DIF_VAR_OTHER_UBASE) {
5047 5071 uintptr_t a;
5048 5072
5049 5073 id -= DIF_VAR_OTHER_UBASE;
5050 5074 svar = vstate->dtvs_globals[id];
5051 5075 ASSERT(svar != NULL);
5052 5076 v = &svar->dtsv_var;
5053 5077
5054 5078 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
5055 5079 regs[rd] = svar->dtsv_data;
5056 5080 break;
5057 5081 }
5058 5082
5059 5083 a = (uintptr_t)svar->dtsv_data;
5060 5084
5061 5085 if (*(uint8_t *)a == UINT8_MAX) {
5062 5086 /*
5063 5087 * If the 0th byte is set to UINT8_MAX
5064 5088 * then this is to be treated as a
5065 5089 * reference to a NULL variable.
5066 5090 */
5067 5091 regs[rd] = NULL;
5068 5092 } else {
5069 5093 regs[rd] = a + sizeof (uint64_t);
5070 5094 }
5071 5095
5072 5096 break;
5073 5097 }
5074 5098
5075 5099 regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
5076 5100 break;
5077 5101
5078 5102 case DIF_OP_STGS:
5079 5103 id = DIF_INSTR_VAR(instr);
5080 5104
5081 5105 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5082 5106 id -= DIF_VAR_OTHER_UBASE;
5083 5107
5084 5108 svar = vstate->dtvs_globals[id];
5085 5109 ASSERT(svar != NULL);
5086 5110 v = &svar->dtsv_var;
5087 5111
5088 5112 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5089 5113 uintptr_t a = (uintptr_t)svar->dtsv_data;
5090 5114
5091 5115 ASSERT(a != NULL);
5092 5116 ASSERT(svar->dtsv_size != 0);
5093 5117
5094 5118 if (regs[rd] == NULL) {
5095 5119 *(uint8_t *)a = UINT8_MAX;
5096 5120 break;
5097 5121 } else {
5098 5122 *(uint8_t *)a = 0;
5099 5123 a += sizeof (uint64_t);
5100 5124 }
5101 5125 if (!dtrace_vcanload(
5102 5126 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5103 5127 mstate, vstate))
5104 5128 break;
5105 5129
5106 5130 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5107 5131 (void *)a, &v->dtdv_type);
5108 5132 break;
5109 5133 }
5110 5134
5111 5135 svar->dtsv_data = regs[rd];
5112 5136 break;
5113 5137
5114 5138 case DIF_OP_LDTA:
5115 5139 /*
5116 5140 * There are no DTrace built-in thread-local arrays at
5117 5141 * present. This opcode is saved for future work.
5118 5142 */
5119 5143 *flags |= CPU_DTRACE_ILLOP;
5120 5144 regs[rd] = 0;
5121 5145 break;
5122 5146
5123 5147 case DIF_OP_LDLS:
5124 5148 id = DIF_INSTR_VAR(instr);
5125 5149
5126 5150 if (id < DIF_VAR_OTHER_UBASE) {
5127 5151 /*
5128 5152 * For now, this has no meaning.
5129 5153 */
5130 5154 regs[rd] = 0;
5131 5155 break;
5132 5156 }
5133 5157
5134 5158 id -= DIF_VAR_OTHER_UBASE;
5135 5159
5136 5160 ASSERT(id < vstate->dtvs_nlocals);
5137 5161 ASSERT(vstate->dtvs_locals != NULL);
5138 5162
5139 5163 svar = vstate->dtvs_locals[id];
5140 5164 ASSERT(svar != NULL);
5141 5165 v = &svar->dtsv_var;
5142 5166
5143 5167 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5144 5168 uintptr_t a = (uintptr_t)svar->dtsv_data;
5145 5169 size_t sz = v->dtdv_type.dtdt_size;
5146 5170
5147 5171 sz += sizeof (uint64_t);
5148 5172 ASSERT(svar->dtsv_size == NCPU * sz);
5149 5173 a += CPU->cpu_id * sz;
5150 5174
5151 5175 if (*(uint8_t *)a == UINT8_MAX) {
5152 5176 /*
5153 5177 * If the 0th byte is set to UINT8_MAX
5154 5178 * then this is to be treated as a
5155 5179 * reference to a NULL variable.
5156 5180 */
5157 5181 regs[rd] = NULL;
5158 5182 } else {
5159 5183 regs[rd] = a + sizeof (uint64_t);
5160 5184 }
5161 5185
5162 5186 break;
5163 5187 }
5164 5188
5165 5189 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
5166 5190 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5167 5191 regs[rd] = tmp[CPU->cpu_id];
5168 5192 break;
5169 5193
5170 5194 case DIF_OP_STLS:
5171 5195 id = DIF_INSTR_VAR(instr);
5172 5196
5173 5197 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5174 5198 id -= DIF_VAR_OTHER_UBASE;
5175 5199 ASSERT(id < vstate->dtvs_nlocals);
5176 5200
5177 5201 ASSERT(vstate->dtvs_locals != NULL);
5178 5202 svar = vstate->dtvs_locals[id];
5179 5203 ASSERT(svar != NULL);
5180 5204 v = &svar->dtsv_var;
5181 5205
5182 5206 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5183 5207 uintptr_t a = (uintptr_t)svar->dtsv_data;
5184 5208 size_t sz = v->dtdv_type.dtdt_size;
5185 5209
5186 5210 sz += sizeof (uint64_t);
5187 5211 ASSERT(svar->dtsv_size == NCPU * sz);
5188 5212 a += CPU->cpu_id * sz;
5189 5213
5190 5214 if (regs[rd] == NULL) {
5191 5215 *(uint8_t *)a = UINT8_MAX;
5192 5216 break;
5193 5217 } else {
5194 5218 *(uint8_t *)a = 0;
5195 5219 a += sizeof (uint64_t);
5196 5220 }
5197 5221
5198 5222 if (!dtrace_vcanload(
5199 5223 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5200 5224 mstate, vstate))
5201 5225 break;
5202 5226
5203 5227 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5204 5228 (void *)a, &v->dtdv_type);
5205 5229 break;
5206 5230 }
5207 5231
5208 5232 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
5209 5233 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5210 5234 tmp[CPU->cpu_id] = regs[rd];
5211 5235 break;
5212 5236
5213 5237 case DIF_OP_LDTS: {
5214 5238 dtrace_dynvar_t *dvar;
5215 5239 dtrace_key_t *key;
5216 5240
5217 5241 id = DIF_INSTR_VAR(instr);
5218 5242 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5219 5243 id -= DIF_VAR_OTHER_UBASE;
5220 5244 v = &vstate->dtvs_tlocals[id];
5221 5245
5222 5246 key = &tupregs[DIF_DTR_NREGS];
5223 5247 key[0].dttk_value = (uint64_t)id;
5224 5248 key[0].dttk_size = 0;
5225 5249 DTRACE_TLS_THRKEY(key[1].dttk_value);
5226 5250 key[1].dttk_size = 0;
5227 5251
5228 5252 dvar = dtrace_dynvar(dstate, 2, key,
5229 5253 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
5230 5254 mstate, vstate);
5231 5255
5232 5256 if (dvar == NULL) {
5233 5257 regs[rd] = 0;
5234 5258 break;
5235 5259 }
5236 5260
5237 5261 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5238 5262 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5239 5263 } else {
5240 5264 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5241 5265 }
5242 5266
5243 5267 break;
5244 5268 }
5245 5269
5246 5270 case DIF_OP_STTS: {
5247 5271 dtrace_dynvar_t *dvar;
5248 5272 dtrace_key_t *key;
5249 5273
5250 5274 id = DIF_INSTR_VAR(instr);
5251 5275 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5252 5276 id -= DIF_VAR_OTHER_UBASE;
5253 5277
5254 5278 key = &tupregs[DIF_DTR_NREGS];
5255 5279 key[0].dttk_value = (uint64_t)id;
5256 5280 key[0].dttk_size = 0;
5257 5281 DTRACE_TLS_THRKEY(key[1].dttk_value);
5258 5282 key[1].dttk_size = 0;
5259 5283 v = &vstate->dtvs_tlocals[id];
5260 5284
5261 5285 dvar = dtrace_dynvar(dstate, 2, key,
5262 5286 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5263 5287 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5264 5288 regs[rd] ? DTRACE_DYNVAR_ALLOC :
5265 5289 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
5266 5290
5267 5291 /*
5268 5292 * Given that we're storing to thread-local data,
5269 5293 * we need to flush our predicate cache.
5270 5294 */
5271 5295 curthread->t_predcache = NULL;
5272 5296
5273 5297 if (dvar == NULL)
5274 5298 break;
5275 5299
5276 5300 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5277 5301 if (!dtrace_vcanload(
5278 5302 (void *)(uintptr_t)regs[rd],
5279 5303 &v->dtdv_type, mstate, vstate))
5280 5304 break;
5281 5305
5282 5306 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5283 5307 dvar->dtdv_data, &v->dtdv_type);
5284 5308 } else {
5285 5309 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5286 5310 }
5287 5311
5288 5312 break;
5289 5313 }
5290 5314
5291 5315 case DIF_OP_SRA:
5292 5316 regs[rd] = (int64_t)regs[r1] >> regs[r2];
5293 5317 break;
5294 5318
5295 5319 case DIF_OP_CALL:
5296 5320 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
5297 5321 regs, tupregs, ttop, mstate, state);
5298 5322 break;
5299 5323
5300 5324 case DIF_OP_PUSHTR:
5301 5325 if (ttop == DIF_DTR_NREGS) {
5302 5326 *flags |= CPU_DTRACE_TUPOFLOW;
5303 5327 break;
5304 5328 }
5305 5329
5306 5330 if (r1 == DIF_TYPE_STRING) {
5307 5331 /*
5308 5332 * If this is a string type and the size is 0,
5309 5333 * we'll use the system-wide default string
5310 5334 * size. Note that we are _not_ looking at
5311 5335 * the value of the DTRACEOPT_STRSIZE option;
5312 5336 * had this been set, we would expect to have
5313 5337 * a non-zero size value in the "pushtr".
5314 5338 */
5315 5339 tupregs[ttop].dttk_size =
5316 5340 dtrace_strlen((char *)(uintptr_t)regs[rd],
5317 5341 regs[r2] ? regs[r2] :
5318 5342 dtrace_strsize_default) + 1;
5319 5343 } else {
5320 5344 tupregs[ttop].dttk_size = regs[r2];
5321 5345 }
5322 5346
5323 5347 tupregs[ttop++].dttk_value = regs[rd];
5324 5348 break;
5325 5349
5326 5350 case DIF_OP_PUSHTV:
5327 5351 if (ttop == DIF_DTR_NREGS) {
5328 5352 *flags |= CPU_DTRACE_TUPOFLOW;
5329 5353 break;
5330 5354 }
5331 5355
5332 5356 tupregs[ttop].dttk_value = regs[rd];
5333 5357 tupregs[ttop++].dttk_size = 0;
5334 5358 break;
5335 5359
5336 5360 case DIF_OP_POPTS:
5337 5361 if (ttop != 0)
5338 5362 ttop--;
5339 5363 break;
5340 5364
5341 5365 case DIF_OP_FLUSHTS:
5342 5366 ttop = 0;
5343 5367 break;
5344 5368
5345 5369 case DIF_OP_LDGAA:
5346 5370 case DIF_OP_LDTAA: {
5347 5371 dtrace_dynvar_t *dvar;
5348 5372 dtrace_key_t *key = tupregs;
5349 5373 uint_t nkeys = ttop;
5350 5374
5351 5375 id = DIF_INSTR_VAR(instr);
5352 5376 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5353 5377 id -= DIF_VAR_OTHER_UBASE;
5354 5378
5355 5379 key[nkeys].dttk_value = (uint64_t)id;
5356 5380 key[nkeys++].dttk_size = 0;
5357 5381
5358 5382 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
5359 5383 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5360 5384 key[nkeys++].dttk_size = 0;
5361 5385 v = &vstate->dtvs_tlocals[id];
5362 5386 } else {
5363 5387 v = &vstate->dtvs_globals[id]->dtsv_var;
5364 5388 }
5365 5389
5366 5390 dvar = dtrace_dynvar(dstate, nkeys, key,
5367 5391 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5368 5392 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5369 5393 DTRACE_DYNVAR_NOALLOC, mstate, vstate);
5370 5394
5371 5395 if (dvar == NULL) {
5372 5396 regs[rd] = 0;
5373 5397 break;
5374 5398 }
5375 5399
5376 5400 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5377 5401 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5378 5402 } else {
5379 5403 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5380 5404 }
5381 5405
5382 5406 break;
5383 5407 }
5384 5408
5385 5409 case DIF_OP_STGAA:
5386 5410 case DIF_OP_STTAA: {
5387 5411 dtrace_dynvar_t *dvar;
5388 5412 dtrace_key_t *key = tupregs;
5389 5413 uint_t nkeys = ttop;
5390 5414
5391 5415 id = DIF_INSTR_VAR(instr);
5392 5416 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5393 5417 id -= DIF_VAR_OTHER_UBASE;
5394 5418
5395 5419 key[nkeys].dttk_value = (uint64_t)id;
5396 5420 key[nkeys++].dttk_size = 0;
5397 5421
5398 5422 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
5399 5423 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5400 5424 key[nkeys++].dttk_size = 0;
5401 5425 v = &vstate->dtvs_tlocals[id];
5402 5426 } else {
5403 5427 v = &vstate->dtvs_globals[id]->dtsv_var;
5404 5428 }
5405 5429
5406 5430 dvar = dtrace_dynvar(dstate, nkeys, key,
5407 5431 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5408 5432 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5409 5433 regs[rd] ? DTRACE_DYNVAR_ALLOC :
5410 5434 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
5411 5435
5412 5436 if (dvar == NULL)
5413 5437 break;
5414 5438
5415 5439 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5416 5440 if (!dtrace_vcanload(
5417 5441 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5418 5442 mstate, vstate))
5419 5443 break;
5420 5444
5421 5445 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5422 5446 dvar->dtdv_data, &v->dtdv_type);
5423 5447 } else {
5424 5448 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5425 5449 }
5426 5450
5427 5451 break;
5428 5452 }
5429 5453
5430 5454 case DIF_OP_ALLOCS: {
5431 5455 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5432 5456 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
5433 5457
5434 5458 /*
5435 5459 * Rounding up the user allocation size could have
5436 5460 * overflowed large, bogus allocations (like -1ULL) to
5437 5461 * 0.
5438 5462 */
5439 5463 if (size < regs[r1] ||
5440 5464 !DTRACE_INSCRATCH(mstate, size)) {
5441 5465 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5442 5466 regs[rd] = NULL;
5443 5467 break;
5444 5468 }
5445 5469
5446 5470 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
5447 5471 mstate->dtms_scratch_ptr += size;
5448 5472 regs[rd] = ptr;
5449 5473 break;
5450 5474 }
5451 5475
5452 5476 case DIF_OP_COPYS:
5453 5477 if (!dtrace_canstore(regs[rd], regs[r2],
5454 5478 mstate, vstate)) {
5455 5479 *flags |= CPU_DTRACE_BADADDR;
5456 5480 *illval = regs[rd];
5457 5481 break;
5458 5482 }
5459 5483
5460 5484 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
5461 5485 break;
5462 5486
5463 5487 dtrace_bcopy((void *)(uintptr_t)regs[r1],
5464 5488 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
5465 5489 break;
5466 5490
5467 5491 case DIF_OP_STB:
5468 5492 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
5469 5493 *flags |= CPU_DTRACE_BADADDR;
5470 5494 *illval = regs[rd];
5471 5495 break;
5472 5496 }
5473 5497 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
5474 5498 break;
5475 5499
5476 5500 case DIF_OP_STH:
5477 5501 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
5478 5502 *flags |= CPU_DTRACE_BADADDR;
5479 5503 *illval = regs[rd];
5480 5504 break;
5481 5505 }
5482 5506 if (regs[rd] & 1) {
5483 5507 *flags |= CPU_DTRACE_BADALIGN;
5484 5508 *illval = regs[rd];
5485 5509 break;
5486 5510 }
5487 5511 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
5488 5512 break;
5489 5513
5490 5514 case DIF_OP_STW:
5491 5515 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
5492 5516 *flags |= CPU_DTRACE_BADADDR;
5493 5517 *illval = regs[rd];
5494 5518 break;
5495 5519 }
5496 5520 if (regs[rd] & 3) {
5497 5521 *flags |= CPU_DTRACE_BADALIGN;
5498 5522 *illval = regs[rd];
5499 5523 break;
5500 5524 }
5501 5525 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
5502 5526 break;
5503 5527
5504 5528 case DIF_OP_STX:
5505 5529 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
5506 5530 *flags |= CPU_DTRACE_BADADDR;
5507 5531 *illval = regs[rd];
5508 5532 break;
5509 5533 }
5510 5534 if (regs[rd] & 7) {
5511 5535 *flags |= CPU_DTRACE_BADALIGN;
5512 5536 *illval = regs[rd];
5513 5537 break;
5514 5538 }
5515 5539 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
5516 5540 break;
5517 5541 }
5518 5542 }
5519 5543
5520 5544 if (!(*flags & CPU_DTRACE_FAULT))
5521 5545 return (rval);
5522 5546
5523 5547 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
5524 5548 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
5525 5549
5526 5550 return (0);
5527 5551 }
5528 5552
5529 5553 static void
5530 5554 dtrace_action_breakpoint(dtrace_ecb_t *ecb)
5531 5555 {
5532 5556 dtrace_probe_t *probe = ecb->dte_probe;
5533 5557 dtrace_provider_t *prov = probe->dtpr_provider;
5534 5558 char c[DTRACE_FULLNAMELEN + 80], *str;
5535 5559 char *msg = "dtrace: breakpoint action at probe ";
5536 5560 char *ecbmsg = " (ecb ";
5537 5561 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
5538 5562 uintptr_t val = (uintptr_t)ecb;
5539 5563 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
5540 5564
5541 5565 if (dtrace_destructive_disallow)
5542 5566 return;
5543 5567
5544 5568 /*
5545 5569 * It's impossible to be taking action on the NULL probe.
5546 5570 */
5547 5571 ASSERT(probe != NULL);
5548 5572
5549 5573 /*
5550 5574 * This is a poor man's (destitute man's?) sprintf(): we want to
5551 5575 * print the provider name, module name, function name and name of
5552 5576 * the probe, along with the hex address of the ECB with the breakpoint
5553 5577 * action -- all of which we must place in the character buffer by
5554 5578 * hand.
5555 5579 */
5556 5580 while (*msg != '\0')
5557 5581 c[i++] = *msg++;
5558 5582
5559 5583 for (str = prov->dtpv_name; *str != '\0'; str++)
5560 5584 c[i++] = *str;
5561 5585 c[i++] = ':';
5562 5586
5563 5587 for (str = probe->dtpr_mod; *str != '\0'; str++)
5564 5588 c[i++] = *str;
5565 5589 c[i++] = ':';
5566 5590
5567 5591 for (str = probe->dtpr_func; *str != '\0'; str++)
5568 5592 c[i++] = *str;
5569 5593 c[i++] = ':';
5570 5594
5571 5595 for (str = probe->dtpr_name; *str != '\0'; str++)
5572 5596 c[i++] = *str;
5573 5597
5574 5598 while (*ecbmsg != '\0')
5575 5599 c[i++] = *ecbmsg++;
5576 5600
5577 5601 while (shift >= 0) {
5578 5602 mask = (uintptr_t)0xf << shift;
5579 5603
5580 5604 if (val >= ((uintptr_t)1 << shift))
5581 5605 c[i++] = "0123456789abcdef"[(val & mask) >> shift];
5582 5606 shift -= 4;
5583 5607 }
5584 5608
5585 5609 c[i++] = ')';
5586 5610 c[i] = '\0';
5587 5611
5588 5612 debug_enter(c);
5589 5613 }
5590 5614
5591 5615 static void
5592 5616 dtrace_action_panic(dtrace_ecb_t *ecb)
5593 5617 {
5594 5618 dtrace_probe_t *probe = ecb->dte_probe;
5595 5619
5596 5620 /*
5597 5621 * It's impossible to be taking action on the NULL probe.
5598 5622 */
5599 5623 ASSERT(probe != NULL);
5600 5624
5601 5625 if (dtrace_destructive_disallow)
5602 5626 return;
5603 5627
5604 5628 if (dtrace_panicked != NULL)
5605 5629 return;
5606 5630
5607 5631 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
5608 5632 return;
5609 5633
5610 5634 /*
5611 5635 * We won the right to panic. (We want to be sure that only one
5612 5636 * thread calls panic() from dtrace_probe(), and that panic() is
5613 5637 * called exactly once.)
5614 5638 */
5615 5639 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5616 5640 probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
5617 5641 probe->dtpr_func, probe->dtpr_name, (void *)ecb);
5618 5642 }
5619 5643
5620 5644 static void
5621 5645 dtrace_action_raise(uint64_t sig)
5622 5646 {
5623 5647 if (dtrace_destructive_disallow)
5624 5648 return;
5625 5649
5626 5650 if (sig >= NSIG) {
5627 5651 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5628 5652 return;
5629 5653 }
5630 5654
5631 5655 /*
5632 5656 * raise() has a queue depth of 1 -- we ignore all subsequent
5633 5657 * invocations of the raise() action.
5634 5658 */
5635 5659 if (curthread->t_dtrace_sig == 0)
5636 5660 curthread->t_dtrace_sig = (uint8_t)sig;
5637 5661
5638 5662 curthread->t_sig_check = 1;
5639 5663 aston(curthread);
5640 5664 }
5641 5665
5642 5666 static void
5643 5667 dtrace_action_stop(void)
5644 5668 {
5645 5669 if (dtrace_destructive_disallow)
5646 5670 return;
5647 5671
5648 5672 if (!curthread->t_dtrace_stop) {
5649 5673 curthread->t_dtrace_stop = 1;
5650 5674 curthread->t_sig_check = 1;
5651 5675 aston(curthread);
5652 5676 }
5653 5677 }
5654 5678
5655 5679 static void
5656 5680 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
5657 5681 {
5658 5682 hrtime_t now;
5659 5683 volatile uint16_t *flags;
5660 5684 cpu_t *cpu = CPU;
5661 5685
5662 5686 if (dtrace_destructive_disallow)
5663 5687 return;
5664 5688
5665 5689 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
5666 5690
5667 5691 now = dtrace_gethrtime();
5668 5692
5669 5693 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
5670 5694 /*
5671 5695 * We need to advance the mark to the current time.
5672 5696 */
5673 5697 cpu->cpu_dtrace_chillmark = now;
5674 5698 cpu->cpu_dtrace_chilled = 0;
5675 5699 }
5676 5700
5677 5701 /*
5678 5702 * Now check to see if the requested chill time would take us over
5679 5703 * the maximum amount of time allowed in the chill interval. (Or
5680 5704 * worse, if the calculation itself induces overflow.)
5681 5705 */
5682 5706 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
5683 5707 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
5684 5708 *flags |= CPU_DTRACE_ILLOP;
5685 5709 return;
5686 5710 }
5687 5711
5688 5712 while (dtrace_gethrtime() - now < val)
5689 5713 continue;
5690 5714
5691 5715 /*
5692 5716 * Normally, we assure that the value of the variable "timestamp" does
5693 5717 * not change within an ECB. The presence of chill() represents an
5694 5718 * exception to this rule, however.
5695 5719 */
5696 5720 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
5697 5721 cpu->cpu_dtrace_chilled += val;
5698 5722 }
5699 5723
5700 5724 static void
5701 5725 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
5702 5726 uint64_t *buf, uint64_t arg)
5703 5727 {
5704 5728 int nframes = DTRACE_USTACK_NFRAMES(arg);
5705 5729 int strsize = DTRACE_USTACK_STRSIZE(arg);
5706 5730 uint64_t *pcs = &buf[1], *fps;
5707 5731 char *str = (char *)&pcs[nframes];
5708 5732 int size, offs = 0, i, j;
5709 5733 uintptr_t old = mstate->dtms_scratch_ptr, saved;
5710 5734 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
5711 5735 char *sym;
5712 5736
5713 5737 /*
5714 5738 * Should be taking a faster path if string space has not been
5715 5739 * allocated.
5716 5740 */
5717 5741 ASSERT(strsize != 0);
5718 5742
5719 5743 /*
5720 5744 * We will first allocate some temporary space for the frame pointers.
5721 5745 */
5722 5746 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5723 5747 size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
5724 5748 (nframes * sizeof (uint64_t));
5725 5749
5726 5750 if (!DTRACE_INSCRATCH(mstate, size)) {
5727 5751 /*
5728 5752 * Not enough room for our frame pointers -- need to indicate
5729 5753 * that we ran out of scratch space.
5730 5754 */
5731 5755 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5732 5756 return;
5733 5757 }
5734 5758
5735 5759 mstate->dtms_scratch_ptr += size;
5736 5760 saved = mstate->dtms_scratch_ptr;
5737 5761
5738 5762 /*
5739 5763 * Now get a stack with both program counters and frame pointers.
5740 5764 */
5741 5765 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5742 5766 dtrace_getufpstack(buf, fps, nframes + 1);
5743 5767 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5744 5768
5745 5769 /*
5746 5770 * If that faulted, we're cooked.
5747 5771 */
5748 5772 if (*flags & CPU_DTRACE_FAULT)
5749 5773 goto out;
5750 5774
5751 5775 /*
5752 5776 * Now we want to walk up the stack, calling the USTACK helper. For
5753 5777 * each iteration, we restore the scratch pointer.
5754 5778 */
5755 5779 for (i = 0; i < nframes; i++) {
5756 5780 mstate->dtms_scratch_ptr = saved;
5757 5781
5758 5782 if (offs >= strsize)
5759 5783 break;
5760 5784
5761 5785 sym = (char *)(uintptr_t)dtrace_helper(
5762 5786 DTRACE_HELPER_ACTION_USTACK,
5763 5787 mstate, state, pcs[i], fps[i]);
5764 5788
5765 5789 /*
5766 5790 * If we faulted while running the helper, we're going to
5767 5791 * clear the fault and null out the corresponding string.
5768 5792 */
5769 5793 if (*flags & CPU_DTRACE_FAULT) {
5770 5794 *flags &= ~CPU_DTRACE_FAULT;
5771 5795 str[offs++] = '\0';
5772 5796 continue;
5773 5797 }
5774 5798
5775 5799 if (sym == NULL) {
5776 5800 str[offs++] = '\0';
5777 5801 continue;
5778 5802 }
5779 5803
5780 5804 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5781 5805
5782 5806 /*
5783 5807 * Now copy in the string that the helper returned to us.
5784 5808 */
5785 5809 for (j = 0; offs + j < strsize; j++) {
5786 5810 if ((str[offs + j] = sym[j]) == '\0')
5787 5811 break;
5788 5812 }
5789 5813
5790 5814 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5791 5815
5792 5816 offs += j + 1;
5793 5817 }
5794 5818
5795 5819 if (offs >= strsize) {
5796 5820 /*
5797 5821 * If we didn't have room for all of the strings, we don't
5798 5822 * abort processing -- this needn't be a fatal error -- but we
5799 5823 * still want to increment a counter (dts_stkstroverflows) to
5800 5824 * allow this condition to be warned about. (If this is from
5801 5825 * a jstack() action, it is easily tuned via jstackstrsize.)
5802 5826 */
5803 5827 dtrace_error(&state->dts_stkstroverflows);
5804 5828 }
5805 5829
5806 5830 while (offs < strsize)
5807 5831 str[offs++] = '\0';
5808 5832
5809 5833 out:
5810 5834 mstate->dtms_scratch_ptr = old;
5811 5835 }
5812 5836
5813 5837 /*
5814 5838 * If you're looking for the epicenter of DTrace, you just found it. This
5815 5839 * is the function called by the provider to fire a probe -- from which all
5816 5840 * subsequent probe-context DTrace activity emanates.
5817 5841 */
5818 5842 void
5819 5843 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
5820 5844 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
5821 5845 {
5822 5846 processorid_t cpuid;
5823 5847 dtrace_icookie_t cookie;
5824 5848 dtrace_probe_t *probe;
5825 5849 dtrace_mstate_t mstate;
5826 5850 dtrace_ecb_t *ecb;
5827 5851 dtrace_action_t *act;
5828 5852 intptr_t offs;
5829 5853 size_t size;
5830 5854 int vtime, onintr;
5831 5855 volatile uint16_t *flags;
5832 5856 hrtime_t now;
5833 5857
5834 5858 /*
5835 5859 * Kick out immediately if this CPU is still being born (in which case
5836 5860 * curthread will be set to -1) or the current thread can't allow
5837 5861 * probes in its current context.
5838 5862 */
5839 5863 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
5840 5864 return;
5841 5865
5842 5866 cookie = dtrace_interrupt_disable();
5843 5867 probe = dtrace_probes[id - 1];
5844 5868 cpuid = CPU->cpu_id;
5845 5869 onintr = CPU_ON_INTR(CPU);
5846 5870
5847 5871 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
5848 5872 probe->dtpr_predcache == curthread->t_predcache) {
5849 5873 /*
5850 5874 * We have hit in the predicate cache; we know that
5851 5875 * this predicate would evaluate to be false.
5852 5876 */
5853 5877 dtrace_interrupt_enable(cookie);
5854 5878 return;
5855 5879 }
5856 5880
5857 5881 if (panic_quiesce) {
5858 5882 /*
5859 5883 * We don't trace anything if we're panicking.
5860 5884 */
5861 5885 dtrace_interrupt_enable(cookie);
5862 5886 return;
5863 5887 }
5864 5888
5865 5889 now = dtrace_gethrtime();
5866 5890 vtime = dtrace_vtime_references != 0;
5867 5891
5868 5892 if (vtime && curthread->t_dtrace_start)
5869 5893 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
5870 5894
5871 5895 mstate.dtms_difo = NULL;
5872 5896 mstate.dtms_probe = probe;
5873 5897 mstate.dtms_strtok = NULL;
5874 5898 mstate.dtms_arg[0] = arg0;
5875 5899 mstate.dtms_arg[1] = arg1;
5876 5900 mstate.dtms_arg[2] = arg2;
5877 5901 mstate.dtms_arg[3] = arg3;
5878 5902 mstate.dtms_arg[4] = arg4;
5879 5903
5880 5904 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
5881 5905
5882 5906 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
5883 5907 dtrace_predicate_t *pred = ecb->dte_predicate;
5884 5908 dtrace_state_t *state = ecb->dte_state;
5885 5909 dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
5886 5910 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
5887 5911 dtrace_vstate_t *vstate = &state->dts_vstate;
5888 5912 dtrace_provider_t *prov = probe->dtpr_provider;
5889 5913 uint64_t tracememsize = 0;
5890 5914 int committed = 0;
5891 5915 caddr_t tomax;
5892 5916
5893 5917 /*
5894 5918 * A little subtlety with the following (seemingly innocuous)
5895 5919 * declaration of the automatic 'val': by looking at the
5896 5920 * code, you might think that it could be declared in the
5897 5921 * action processing loop, below. (That is, it's only used in
5898 5922 * the action processing loop.) However, it must be declared
5899 5923 * out of that scope because in the case of DIF expression
5900 5924 * arguments to aggregating actions, one iteration of the
5901 5925 * action loop will use the last iteration's value.
5902 5926 */
5903 5927 #ifdef lint
5904 5928 uint64_t val = 0;
5905 5929 #else
5906 5930 uint64_t val;
5907 5931 #endif
5908 5932
5909 5933 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
5910 5934 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
5911 5935 *flags &= ~CPU_DTRACE_ERROR;
5912 5936
5913 5937 if (prov == dtrace_provider) {
5914 5938 /*
5915 5939 * If dtrace itself is the provider of this probe,
5916 5940 * we're only going to continue processing the ECB if
5917 5941 * arg0 (the dtrace_state_t) is equal to the ECB's
5918 5942 * creating state. (This prevents disjoint consumers
5919 5943 * from seeing one another's metaprobes.)
5920 5944 */
5921 5945 if (arg0 != (uint64_t)(uintptr_t)state)
5922 5946 continue;
5923 5947 }
5924 5948
5925 5949 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
5926 5950 /*
5927 5951 * We're not currently active. If our provider isn't
5928 5952 * the dtrace pseudo provider, we're not interested.
5929 5953 */
5930 5954 if (prov != dtrace_provider)
5931 5955 continue;
5932 5956
5933 5957 /*
5934 5958 * Now we must further check if we are in the BEGIN
5935 5959 * probe. If we are, we will only continue processing
5936 5960 * if we're still in WARMUP -- if one BEGIN enabling
5937 5961 * has invoked the exit() action, we don't want to
5938 5962 * evaluate subsequent BEGIN enablings.
5939 5963 */
5940 5964 if (probe->dtpr_id == dtrace_probeid_begin &&
5941 5965 state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
5942 5966 ASSERT(state->dts_activity ==
5943 5967 DTRACE_ACTIVITY_DRAINING);
5944 5968 continue;
5945 5969 }
5946 5970 }
5947 5971
5948 5972 if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
5949 5973 continue;
5950 5974
5951 5975 if (now - state->dts_alive > dtrace_deadman_timeout) {
5952 5976 /*
5953 5977 * We seem to be dead. Unless we (a) have kernel
5954 5978 * destructive permissions (b) have expicitly enabled
5955 5979 * destructive actions and (c) destructive actions have
5956 5980 * not been disabled, we're going to transition into
5957 5981 * the KILLED state, from which no further processing
5958 5982 * on this state will be performed.
5959 5983 */
5960 5984 if (!dtrace_priv_kernel_destructive(state) ||
5961 5985 !state->dts_cred.dcr_destructive ||
5962 5986 dtrace_destructive_disallow) {
5963 5987 void *activity = &state->dts_activity;
5964 5988 dtrace_activity_t current;
5965 5989
5966 5990 do {
5967 5991 current = state->dts_activity;
5968 5992 } while (dtrace_cas32(activity, current,
5969 5993 DTRACE_ACTIVITY_KILLED) != current);
5970 5994
5971 5995 continue;
5972 5996 }
5973 5997 }
5974 5998
5975 5999 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
5976 6000 ecb->dte_alignment, state, &mstate)) < 0)
5977 6001 continue;
5978 6002
5979 6003 tomax = buf->dtb_tomax;
5980 6004 ASSERT(tomax != NULL);
5981 6005
5982 6006 if (ecb->dte_size != 0)
5983 6007 DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid);
5984 6008
5985 6009 mstate.dtms_epid = ecb->dte_epid;
5986 6010 mstate.dtms_present |= DTRACE_MSTATE_EPID;
5987 6011
5988 6012 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
5989 6013 mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
5990 6014
5991 6015 if (pred != NULL) {
5992 6016 dtrace_difo_t *dp = pred->dtp_difo;
5993 6017 int rval;
5994 6018
5995 6019 rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
5996 6020
5997 6021 if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
5998 6022 dtrace_cacheid_t cid = probe->dtpr_predcache;
5999 6023
6000 6024 if (cid != DTRACE_CACHEIDNONE && !onintr) {
6001 6025 /*
6002 6026 * Update the predicate cache...
6003 6027 */
6004 6028 ASSERT(cid == pred->dtp_cacheid);
6005 6029 curthread->t_predcache = cid;
6006 6030 }
6007 6031
6008 6032 continue;
6009 6033 }
6010 6034 }
6011 6035
6012 6036 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
6013 6037 act != NULL; act = act->dta_next) {
6014 6038 size_t valoffs;
6015 6039 dtrace_difo_t *dp;
6016 6040 dtrace_recdesc_t *rec = &act->dta_rec;
6017 6041
6018 6042 size = rec->dtrd_size;
6019 6043 valoffs = offs + rec->dtrd_offset;
6020 6044
6021 6045 if (DTRACEACT_ISAGG(act->dta_kind)) {
6022 6046 uint64_t v = 0xbad;
6023 6047 dtrace_aggregation_t *agg;
6024 6048
6025 6049 agg = (dtrace_aggregation_t *)act;
6026 6050
6027 6051 if ((dp = act->dta_difo) != NULL)
6028 6052 v = dtrace_dif_emulate(dp,
6029 6053 &mstate, vstate, state);
6030 6054
6031 6055 if (*flags & CPU_DTRACE_ERROR)
6032 6056 continue;
6033 6057
6034 6058 /*
6035 6059 * Note that we always pass the expression
6036 6060 * value from the previous iteration of the
6037 6061 * action loop. This value will only be used
6038 6062 * if there is an expression argument to the
6039 6063 * aggregating action, denoted by the
6040 6064 * dtag_hasarg field.
6041 6065 */
6042 6066 dtrace_aggregate(agg, buf,
6043 6067 offs, aggbuf, v, val);
6044 6068 continue;
6045 6069 }
6046 6070
6047 6071 switch (act->dta_kind) {
6048 6072 case DTRACEACT_STOP:
6049 6073 if (dtrace_priv_proc_destructive(state,
6050 6074 &mstate))
6051 6075 dtrace_action_stop();
6052 6076 continue;
6053 6077
6054 6078 case DTRACEACT_BREAKPOINT:
6055 6079 if (dtrace_priv_kernel_destructive(state))
6056 6080 dtrace_action_breakpoint(ecb);
6057 6081 continue;
6058 6082
6059 6083 case DTRACEACT_PANIC:
6060 6084 if (dtrace_priv_kernel_destructive(state))
6061 6085 dtrace_action_panic(ecb);
6062 6086 continue;
6063 6087
6064 6088 case DTRACEACT_STACK:
6065 6089 if (!dtrace_priv_kernel(state))
6066 6090 continue;
6067 6091
6068 6092 dtrace_getpcstack((pc_t *)(tomax + valoffs),
6069 6093 size / sizeof (pc_t), probe->dtpr_aframes,
6070 6094 DTRACE_ANCHORED(probe) ? NULL :
6071 6095 (uint32_t *)arg0);
6072 6096
6073 6097 continue;
6074 6098
6075 6099 case DTRACEACT_JSTACK:
6076 6100 case DTRACEACT_USTACK:
6077 6101 if (!dtrace_priv_proc(state, &mstate))
6078 6102 continue;
6079 6103
6080 6104 /*
6081 6105 * See comment in DIF_VAR_PID.
6082 6106 */
6083 6107 if (DTRACE_ANCHORED(mstate.dtms_probe) &&
6084 6108 CPU_ON_INTR(CPU)) {
6085 6109 int depth = DTRACE_USTACK_NFRAMES(
6086 6110 rec->dtrd_arg) + 1;
6087 6111
6088 6112 dtrace_bzero((void *)(tomax + valoffs),
6089 6113 DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
6090 6114 + depth * sizeof (uint64_t));
6091 6115
6092 6116 continue;
6093 6117 }
6094 6118
6095 6119 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
6096 6120 curproc->p_dtrace_helpers != NULL) {
6097 6121 /*
6098 6122 * This is the slow path -- we have
6099 6123 * allocated string space, and we're
6100 6124 * getting the stack of a process that
6101 6125 * has helpers. Call into a separate
6102 6126 * routine to perform this processing.
6103 6127 */
6104 6128 dtrace_action_ustack(&mstate, state,
6105 6129 (uint64_t *)(tomax + valoffs),
6106 6130 rec->dtrd_arg);
6107 6131 continue;
6108 6132 }
6109 6133
6110 6134 /*
6111 6135 * Clear the string space, since there's no
6112 6136 * helper to do it for us.
6113 6137 */
6114 6138 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
6115 6139 int depth = DTRACE_USTACK_NFRAMES(
6116 6140 rec->dtrd_arg);
6117 6141 size_t strsize = DTRACE_USTACK_STRSIZE(
6118 6142 rec->dtrd_arg);
6119 6143 uint64_t *buf = (uint64_t *)(tomax +
6120 6144 valoffs);
6121 6145 void *strspace = &buf[depth + 1];
6122 6146
6123 6147 dtrace_bzero(strspace,
6124 6148 MIN(depth, strsize));
6125 6149 }
6126 6150
6127 6151 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6128 6152 dtrace_getupcstack((uint64_t *)
6129 6153 (tomax + valoffs),
6130 6154 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
6131 6155 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6132 6156 continue;
6133 6157
6134 6158 default:
6135 6159 break;
6136 6160 }
6137 6161
6138 6162 dp = act->dta_difo;
6139 6163 ASSERT(dp != NULL);
6140 6164
6141 6165 val = dtrace_dif_emulate(dp, &mstate, vstate, state);
6142 6166
6143 6167 if (*flags & CPU_DTRACE_ERROR)
6144 6168 continue;
6145 6169
6146 6170 switch (act->dta_kind) {
6147 6171 case DTRACEACT_SPECULATE:
6148 6172 ASSERT(buf == &state->dts_buffer[cpuid]);
6149 6173 buf = dtrace_speculation_buffer(state,
6150 6174 cpuid, val);
6151 6175
6152 6176 if (buf == NULL) {
6153 6177 *flags |= CPU_DTRACE_DROP;
6154 6178 continue;
6155 6179 }
6156 6180
6157 6181 offs = dtrace_buffer_reserve(buf,
6158 6182 ecb->dte_needed, ecb->dte_alignment,
6159 6183 state, NULL);
6160 6184
6161 6185 if (offs < 0) {
6162 6186 *flags |= CPU_DTRACE_DROP;
6163 6187 continue;
6164 6188 }
6165 6189
6166 6190 tomax = buf->dtb_tomax;
6167 6191 ASSERT(tomax != NULL);
6168 6192
6169 6193 if (ecb->dte_size != 0)
6170 6194 DTRACE_STORE(uint32_t, tomax, offs,
6171 6195 ecb->dte_epid);
6172 6196 continue;
6173 6197
6174 6198 case DTRACEACT_CHILL:
6175 6199 if (dtrace_priv_kernel_destructive(state))
6176 6200 dtrace_action_chill(&mstate, val);
6177 6201 continue;
6178 6202
6179 6203 case DTRACEACT_RAISE:
6180 6204 if (dtrace_priv_proc_destructive(state,
6181 6205 &mstate))
6182 6206 dtrace_action_raise(val);
6183 6207 continue;
6184 6208
6185 6209 case DTRACEACT_COMMIT:
6186 6210 ASSERT(!committed);
6187 6211
6188 6212 /*
6189 6213 * We need to commit our buffer state.
6190 6214 */
6191 6215 if (ecb->dte_size)
6192 6216 buf->dtb_offset = offs + ecb->dte_size;
6193 6217 buf = &state->dts_buffer[cpuid];
6194 6218 dtrace_speculation_commit(state, cpuid, val);
6195 6219 committed = 1;
6196 6220 continue;
6197 6221
6198 6222 case DTRACEACT_DISCARD:
6199 6223 dtrace_speculation_discard(state, cpuid, val);
6200 6224 continue;
6201 6225
6202 6226 case DTRACEACT_DIFEXPR:
6203 6227 case DTRACEACT_LIBACT:
6204 6228 case DTRACEACT_PRINTF:
6205 6229 case DTRACEACT_PRINTA:
6206 6230 case DTRACEACT_SYSTEM:
6207 6231 case DTRACEACT_FREOPEN:
6208 6232 case DTRACEACT_TRACEMEM:
6209 6233 break;
6210 6234
6211 6235 case DTRACEACT_TRACEMEM_DYNSIZE:
6212 6236 tracememsize = val;
6213 6237 break;
6214 6238
6215 6239 case DTRACEACT_SYM:
6216 6240 case DTRACEACT_MOD:
6217 6241 if (!dtrace_priv_kernel(state))
6218 6242 continue;
6219 6243 break;
6220 6244
6221 6245 case DTRACEACT_USYM:
6222 6246 case DTRACEACT_UMOD:
6223 6247 case DTRACEACT_UADDR: {
6224 6248 struct pid *pid = curthread->t_procp->p_pidp;
6225 6249
6226 6250 if (!dtrace_priv_proc(state, &mstate))
6227 6251 continue;
6228 6252
6229 6253 DTRACE_STORE(uint64_t, tomax,
6230 6254 valoffs, (uint64_t)pid->pid_id);
6231 6255 DTRACE_STORE(uint64_t, tomax,
6232 6256 valoffs + sizeof (uint64_t), val);
6233 6257
6234 6258 continue;
6235 6259 }
6236 6260
6237 6261 case DTRACEACT_EXIT: {
6238 6262 /*
6239 6263 * For the exit action, we are going to attempt
6240 6264 * to atomically set our activity to be
6241 6265 * draining. If this fails (either because
6242 6266 * another CPU has beat us to the exit action,
6243 6267 * or because our current activity is something
6244 6268 * other than ACTIVE or WARMUP), we will
6245 6269 * continue. This assures that the exit action
6246 6270 * can be successfully recorded at most once
6247 6271 * when we're in the ACTIVE state. If we're
6248 6272 * encountering the exit() action while in
6249 6273 * COOLDOWN, however, we want to honor the new
6250 6274 * status code. (We know that we're the only
6251 6275 * thread in COOLDOWN, so there is no race.)
6252 6276 */
6253 6277 void *activity = &state->dts_activity;
6254 6278 dtrace_activity_t current = state->dts_activity;
6255 6279
6256 6280 if (current == DTRACE_ACTIVITY_COOLDOWN)
6257 6281 break;
6258 6282
6259 6283 if (current != DTRACE_ACTIVITY_WARMUP)
6260 6284 current = DTRACE_ACTIVITY_ACTIVE;
6261 6285
6262 6286 if (dtrace_cas32(activity, current,
6263 6287 DTRACE_ACTIVITY_DRAINING) != current) {
6264 6288 *flags |= CPU_DTRACE_DROP;
6265 6289 continue;
6266 6290 }
6267 6291
6268 6292 break;
6269 6293 }
6270 6294
6271 6295 default:
6272 6296 ASSERT(0);
6273 6297 }
6274 6298
6275 6299 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
6276 6300 uintptr_t end = valoffs + size;
6277 6301
6278 6302 if (tracememsize != 0 &&
6279 6303 valoffs + tracememsize < end) {
6280 6304 end = valoffs + tracememsize;
6281 6305 tracememsize = 0;
6282 6306 }
6283 6307
6284 6308 if (!dtrace_vcanload((void *)(uintptr_t)val,
6285 6309 &dp->dtdo_rtype, &mstate, vstate))
6286 6310 continue;
6287 6311
6288 6312 /*
6289 6313 * If this is a string, we're going to only
6290 6314 * load until we find the zero byte -- after
6291 6315 * which we'll store zero bytes.
6292 6316 */
6293 6317 if (dp->dtdo_rtype.dtdt_kind ==
6294 6318 DIF_TYPE_STRING) {
6295 6319 char c = '\0' + 1;
6296 6320 int intuple = act->dta_intuple;
6297 6321 size_t s;
6298 6322
6299 6323 for (s = 0; s < size; s++) {
6300 6324 if (c != '\0')
6301 6325 c = dtrace_load8(val++);
6302 6326
6303 6327 DTRACE_STORE(uint8_t, tomax,
6304 6328 valoffs++, c);
6305 6329
6306 6330 if (c == '\0' && intuple)
6307 6331 break;
6308 6332 }
6309 6333
6310 6334 continue;
6311 6335 }
6312 6336
6313 6337 while (valoffs < end) {
6314 6338 DTRACE_STORE(uint8_t, tomax, valoffs++,
6315 6339 dtrace_load8(val++));
6316 6340 }
6317 6341
6318 6342 continue;
6319 6343 }
6320 6344
6321 6345 switch (size) {
6322 6346 case 0:
6323 6347 break;
6324 6348
6325 6349 case sizeof (uint8_t):
6326 6350 DTRACE_STORE(uint8_t, tomax, valoffs, val);
6327 6351 break;
6328 6352 case sizeof (uint16_t):
6329 6353 DTRACE_STORE(uint16_t, tomax, valoffs, val);
6330 6354 break;
6331 6355 case sizeof (uint32_t):
6332 6356 DTRACE_STORE(uint32_t, tomax, valoffs, val);
6333 6357 break;
6334 6358 case sizeof (uint64_t):
6335 6359 DTRACE_STORE(uint64_t, tomax, valoffs, val);
6336 6360 break;
6337 6361 default:
6338 6362 /*
6339 6363 * Any other size should have been returned by
6340 6364 * reference, not by value.
6341 6365 */
6342 6366 ASSERT(0);
6343 6367 break;
6344 6368 }
6345 6369 }
6346 6370
6347 6371 if (*flags & CPU_DTRACE_DROP)
6348 6372 continue;
6349 6373
6350 6374 if (*flags & CPU_DTRACE_FAULT) {
6351 6375 int ndx;
6352 6376 dtrace_action_t *err;
6353 6377
6354 6378 buf->dtb_errors++;
6355 6379
6356 6380 if (probe->dtpr_id == dtrace_probeid_error) {
6357 6381 /*
6358 6382 * There's nothing we can do -- we had an
6359 6383 * error on the error probe. We bump an
6360 6384 * error counter to at least indicate that
6361 6385 * this condition happened.
6362 6386 */
6363 6387 dtrace_error(&state->dts_dblerrors);
6364 6388 continue;
6365 6389 }
6366 6390
6367 6391 if (vtime) {
6368 6392 /*
6369 6393 * Before recursing on dtrace_probe(), we
6370 6394 * need to explicitly clear out our start
6371 6395 * time to prevent it from being accumulated
6372 6396 * into t_dtrace_vtime.
6373 6397 */
6374 6398 curthread->t_dtrace_start = 0;
6375 6399 }
6376 6400
6377 6401 /*
6378 6402 * Iterate over the actions to figure out which action
6379 6403 * we were processing when we experienced the error.
6380 6404 * Note that act points _past_ the faulting action; if
6381 6405 * act is ecb->dte_action, the fault was in the
6382 6406 * predicate, if it's ecb->dte_action->dta_next it's
6383 6407 * in action #1, and so on.
6384 6408 */
6385 6409 for (err = ecb->dte_action, ndx = 0;
6386 6410 err != act; err = err->dta_next, ndx++)
6387 6411 continue;
6388 6412
6389 6413 dtrace_probe_error(state, ecb->dte_epid, ndx,
6390 6414 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
6391 6415 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
6392 6416 cpu_core[cpuid].cpuc_dtrace_illval);
6393 6417
6394 6418 continue;
6395 6419 }
6396 6420
6397 6421 if (!committed)
6398 6422 buf->dtb_offset = offs + ecb->dte_size;
6399 6423 }
6400 6424
6401 6425 if (vtime)
6402 6426 curthread->t_dtrace_start = dtrace_gethrtime();
6403 6427
6404 6428 dtrace_interrupt_enable(cookie);
6405 6429 }
6406 6430
6407 6431 /*
6408 6432 * DTrace Probe Hashing Functions
6409 6433 *
6410 6434 * The functions in this section (and indeed, the functions in remaining
6411 6435 * sections) are not _called_ from probe context. (Any exceptions to this are
6412 6436 * marked with a "Note:".) Rather, they are called from elsewhere in the
6413 6437 * DTrace framework to look-up probes in, add probes to and remove probes from
6414 6438 * the DTrace probe hashes. (Each probe is hashed by each element of the
6415 6439 * probe tuple -- allowing for fast lookups, regardless of what was
6416 6440 * specified.)
6417 6441 */
6418 6442 static uint_t
6419 6443 dtrace_hash_str(char *p)
6420 6444 {
6421 6445 unsigned int g;
6422 6446 uint_t hval = 0;
6423 6447
6424 6448 while (*p) {
6425 6449 hval = (hval << 4) + *p++;
6426 6450 if ((g = (hval & 0xf0000000)) != 0)
6427 6451 hval ^= g >> 24;
6428 6452 hval &= ~g;
6429 6453 }
6430 6454 return (hval);
6431 6455 }
6432 6456
6433 6457 static dtrace_hash_t *
6434 6458 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
6435 6459 {
6436 6460 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
6437 6461
6438 6462 hash->dth_stroffs = stroffs;
6439 6463 hash->dth_nextoffs = nextoffs;
6440 6464 hash->dth_prevoffs = prevoffs;
6441 6465
6442 6466 hash->dth_size = 1;
6443 6467 hash->dth_mask = hash->dth_size - 1;
6444 6468
6445 6469 hash->dth_tab = kmem_zalloc(hash->dth_size *
6446 6470 sizeof (dtrace_hashbucket_t *), KM_SLEEP);
6447 6471
6448 6472 return (hash);
6449 6473 }
6450 6474
6451 6475 static void
6452 6476 dtrace_hash_destroy(dtrace_hash_t *hash)
6453 6477 {
6454 6478 #ifdef DEBUG
6455 6479 int i;
6456 6480
6457 6481 for (i = 0; i < hash->dth_size; i++)
6458 6482 ASSERT(hash->dth_tab[i] == NULL);
6459 6483 #endif
6460 6484
6461 6485 kmem_free(hash->dth_tab,
6462 6486 hash->dth_size * sizeof (dtrace_hashbucket_t *));
6463 6487 kmem_free(hash, sizeof (dtrace_hash_t));
6464 6488 }
6465 6489
6466 6490 static void
6467 6491 dtrace_hash_resize(dtrace_hash_t *hash)
6468 6492 {
6469 6493 int size = hash->dth_size, i, ndx;
6470 6494 int new_size = hash->dth_size << 1;
6471 6495 int new_mask = new_size - 1;
6472 6496 dtrace_hashbucket_t **new_tab, *bucket, *next;
6473 6497
6474 6498 ASSERT((new_size & new_mask) == 0);
6475 6499
6476 6500 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
6477 6501
6478 6502 for (i = 0; i < size; i++) {
6479 6503 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
6480 6504 dtrace_probe_t *probe = bucket->dthb_chain;
6481 6505
6482 6506 ASSERT(probe != NULL);
6483 6507 ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
6484 6508
6485 6509 next = bucket->dthb_next;
6486 6510 bucket->dthb_next = new_tab[ndx];
6487 6511 new_tab[ndx] = bucket;
6488 6512 }
6489 6513 }
6490 6514
6491 6515 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
6492 6516 hash->dth_tab = new_tab;
6493 6517 hash->dth_size = new_size;
6494 6518 hash->dth_mask = new_mask;
6495 6519 }
6496 6520
6497 6521 static void
6498 6522 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
6499 6523 {
6500 6524 int hashval = DTRACE_HASHSTR(hash, new);
6501 6525 int ndx = hashval & hash->dth_mask;
6502 6526 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6503 6527 dtrace_probe_t **nextp, **prevp;
6504 6528
6505 6529 for (; bucket != NULL; bucket = bucket->dthb_next) {
6506 6530 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
6507 6531 goto add;
6508 6532 }
6509 6533
6510 6534 if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
6511 6535 dtrace_hash_resize(hash);
6512 6536 dtrace_hash_add(hash, new);
6513 6537 return;
6514 6538 }
6515 6539
6516 6540 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
6517 6541 bucket->dthb_next = hash->dth_tab[ndx];
6518 6542 hash->dth_tab[ndx] = bucket;
6519 6543 hash->dth_nbuckets++;
6520 6544
6521 6545 add:
6522 6546 nextp = DTRACE_HASHNEXT(hash, new);
6523 6547 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
6524 6548 *nextp = bucket->dthb_chain;
6525 6549
6526 6550 if (bucket->dthb_chain != NULL) {
6527 6551 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
6528 6552 ASSERT(*prevp == NULL);
6529 6553 *prevp = new;
6530 6554 }
6531 6555
6532 6556 bucket->dthb_chain = new;
6533 6557 bucket->dthb_len++;
6534 6558 }
6535 6559
6536 6560 static dtrace_probe_t *
6537 6561 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
6538 6562 {
6539 6563 int hashval = DTRACE_HASHSTR(hash, template);
6540 6564 int ndx = hashval & hash->dth_mask;
6541 6565 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6542 6566
6543 6567 for (; bucket != NULL; bucket = bucket->dthb_next) {
6544 6568 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
6545 6569 return (bucket->dthb_chain);
6546 6570 }
6547 6571
6548 6572 return (NULL);
6549 6573 }
6550 6574
6551 6575 static int
6552 6576 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
6553 6577 {
6554 6578 int hashval = DTRACE_HASHSTR(hash, template);
6555 6579 int ndx = hashval & hash->dth_mask;
6556 6580 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6557 6581
6558 6582 for (; bucket != NULL; bucket = bucket->dthb_next) {
6559 6583 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
6560 6584 return (bucket->dthb_len);
6561 6585 }
6562 6586
6563 6587 return (NULL);
6564 6588 }
6565 6589
6566 6590 static void
6567 6591 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
6568 6592 {
6569 6593 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
6570 6594 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6571 6595
6572 6596 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
6573 6597 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
6574 6598
6575 6599 /*
6576 6600 * Find the bucket that we're removing this probe from.
6577 6601 */
6578 6602 for (; bucket != NULL; bucket = bucket->dthb_next) {
6579 6603 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
6580 6604 break;
6581 6605 }
6582 6606
6583 6607 ASSERT(bucket != NULL);
6584 6608
6585 6609 if (*prevp == NULL) {
6586 6610 if (*nextp == NULL) {
6587 6611 /*
6588 6612 * The removed probe was the only probe on this
6589 6613 * bucket; we need to remove the bucket.
6590 6614 */
6591 6615 dtrace_hashbucket_t *b = hash->dth_tab[ndx];
6592 6616
6593 6617 ASSERT(bucket->dthb_chain == probe);
6594 6618 ASSERT(b != NULL);
6595 6619
6596 6620 if (b == bucket) {
6597 6621 hash->dth_tab[ndx] = bucket->dthb_next;
6598 6622 } else {
6599 6623 while (b->dthb_next != bucket)
6600 6624 b = b->dthb_next;
6601 6625 b->dthb_next = bucket->dthb_next;
6602 6626 }
6603 6627
6604 6628 ASSERT(hash->dth_nbuckets > 0);
6605 6629 hash->dth_nbuckets--;
6606 6630 kmem_free(bucket, sizeof (dtrace_hashbucket_t));
6607 6631 return;
6608 6632 }
6609 6633
6610 6634 bucket->dthb_chain = *nextp;
6611 6635 } else {
6612 6636 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
6613 6637 }
6614 6638
6615 6639 if (*nextp != NULL)
6616 6640 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
6617 6641 }
6618 6642
6619 6643 /*
6620 6644 * DTrace Utility Functions
6621 6645 *
6622 6646 * These are random utility functions that are _not_ called from probe context.
6623 6647 */
6624 6648 static int
6625 6649 dtrace_badattr(const dtrace_attribute_t *a)
6626 6650 {
6627 6651 return (a->dtat_name > DTRACE_STABILITY_MAX ||
6628 6652 a->dtat_data > DTRACE_STABILITY_MAX ||
6629 6653 a->dtat_class > DTRACE_CLASS_MAX);
6630 6654 }
6631 6655
6632 6656 /*
6633 6657 * Return a duplicate copy of a string. If the specified string is NULL,
6634 6658 * this function returns a zero-length string.
6635 6659 */
6636 6660 static char *
6637 6661 dtrace_strdup(const char *str)
6638 6662 {
6639 6663 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
6640 6664
6641 6665 if (str != NULL)
6642 6666 (void) strcpy(new, str);
6643 6667
6644 6668 return (new);
6645 6669 }
6646 6670
6647 6671 #define DTRACE_ISALPHA(c) \
6648 6672 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6649 6673
6650 6674 static int
6651 6675 dtrace_badname(const char *s)
6652 6676 {
6653 6677 char c;
6654 6678
6655 6679 if (s == NULL || (c = *s++) == '\0')
6656 6680 return (0);
6657 6681
6658 6682 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
6659 6683 return (1);
6660 6684
6661 6685 while ((c = *s++) != '\0') {
6662 6686 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
6663 6687 c != '-' && c != '_' && c != '.' && c != '`')
6664 6688 return (1);
6665 6689 }
6666 6690
6667 6691 return (0);
6668 6692 }
6669 6693
6670 6694 static void
6671 6695 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
6672 6696 {
6673 6697 uint32_t priv;
6674 6698
6675 6699 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
6676 6700 /*
6677 6701 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6678 6702 */
6679 6703 priv = DTRACE_PRIV_ALL;
6680 6704 } else {
6681 6705 *uidp = crgetuid(cr);
6682 6706 *zoneidp = crgetzoneid(cr);
6683 6707
6684 6708 priv = 0;
6685 6709 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
6686 6710 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
6687 6711 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
6688 6712 priv |= DTRACE_PRIV_USER;
6689 6713 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
6690 6714 priv |= DTRACE_PRIV_PROC;
6691 6715 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
6692 6716 priv |= DTRACE_PRIV_OWNER;
6693 6717 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
6694 6718 priv |= DTRACE_PRIV_ZONEOWNER;
6695 6719 }
6696 6720
6697 6721 *privp = priv;
6698 6722 }
6699 6723
6700 6724 #ifdef DTRACE_ERRDEBUG
6701 6725 static void
6702 6726 dtrace_errdebug(const char *str)
6703 6727 {
6704 6728 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ;
6705 6729 int occupied = 0;
6706 6730
6707 6731 mutex_enter(&dtrace_errlock);
6708 6732 dtrace_errlast = str;
6709 6733 dtrace_errthread = curthread;
6710 6734
6711 6735 while (occupied++ < DTRACE_ERRHASHSZ) {
6712 6736 if (dtrace_errhash[hval].dter_msg == str) {
6713 6737 dtrace_errhash[hval].dter_count++;
6714 6738 goto out;
6715 6739 }
6716 6740
6717 6741 if (dtrace_errhash[hval].dter_msg != NULL) {
6718 6742 hval = (hval + 1) % DTRACE_ERRHASHSZ;
6719 6743 continue;
6720 6744 }
6721 6745
6722 6746 dtrace_errhash[hval].dter_msg = str;
6723 6747 dtrace_errhash[hval].dter_count = 1;
6724 6748 goto out;
6725 6749 }
6726 6750
6727 6751 panic("dtrace: undersized error hash");
6728 6752 out:
6729 6753 mutex_exit(&dtrace_errlock);
6730 6754 }
6731 6755 #endif
6732 6756
6733 6757 /*
6734 6758 * DTrace Matching Functions
6735 6759 *
6736 6760 * These functions are used to match groups of probes, given some elements of
6737 6761 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6738 6762 */
6739 6763 static int
6740 6764 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
6741 6765 zoneid_t zoneid)
6742 6766 {
6743 6767 if (priv != DTRACE_PRIV_ALL) {
6744 6768 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
6745 6769 uint32_t match = priv & ppriv;
6746 6770
6747 6771 /*
6748 6772 * No PRIV_DTRACE_* privileges...
6749 6773 */
6750 6774 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
6751 6775 DTRACE_PRIV_KERNEL)) == 0)
6752 6776 return (0);
6753 6777
6754 6778 /*
6755 6779 * No matching bits, but there were bits to match...
6756 6780 */
6757 6781 if (match == 0 && ppriv != 0)
6758 6782 return (0);
6759 6783
6760 6784 /*
6761 6785 * Need to have permissions to the process, but don't...
6762 6786 */
6763 6787 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
6764 6788 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
6765 6789 return (0);
6766 6790 }
6767 6791
6768 6792 /*
6769 6793 * Need to be in the same zone unless we possess the
6770 6794 * privilege to examine all zones.
6771 6795 */
6772 6796 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
6773 6797 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
6774 6798 return (0);
6775 6799 }
6776 6800 }
6777 6801
6778 6802 return (1);
6779 6803 }
6780 6804
6781 6805 /*
6782 6806 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6783 6807 * consists of input pattern strings and an ops-vector to evaluate them.
6784 6808 * This function returns >0 for match, 0 for no match, and <0 for error.
6785 6809 */
6786 6810 static int
6787 6811 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
6788 6812 uint32_t priv, uid_t uid, zoneid_t zoneid)
6789 6813 {
6790 6814 dtrace_provider_t *pvp = prp->dtpr_provider;
6791 6815 int rv;
6792 6816
6793 6817 if (pvp->dtpv_defunct)
6794 6818 return (0);
6795 6819
6796 6820 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
6797 6821 return (rv);
6798 6822
6799 6823 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
6800 6824 return (rv);
6801 6825
6802 6826 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
6803 6827 return (rv);
6804 6828
6805 6829 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
6806 6830 return (rv);
6807 6831
6808 6832 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
6809 6833 return (0);
6810 6834
6811 6835 return (rv);
6812 6836 }
6813 6837
6814 6838 /*
6815 6839 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6816 6840 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6817 6841 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6818 6842 * In addition, all of the recursion cases except for '*' matching have been
6819 6843 * unwound. For '*', we still implement recursive evaluation, but a depth
6820 6844 * counter is maintained and matching is aborted if we recurse too deep.
6821 6845 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6822 6846 */
6823 6847 static int
6824 6848 dtrace_match_glob(const char *s, const char *p, int depth)
6825 6849 {
6826 6850 const char *olds;
6827 6851 char s1, c;
6828 6852 int gs;
6829 6853
6830 6854 if (depth > DTRACE_PROBEKEY_MAXDEPTH)
6831 6855 return (-1);
6832 6856
6833 6857 if (s == NULL)
6834 6858 s = ""; /* treat NULL as empty string */
6835 6859
6836 6860 top:
6837 6861 olds = s;
6838 6862 s1 = *s++;
6839 6863
6840 6864 if (p == NULL)
6841 6865 return (0);
6842 6866
6843 6867 if ((c = *p++) == '\0')
6844 6868 return (s1 == '\0');
6845 6869
6846 6870 switch (c) {
6847 6871 case '[': {
6848 6872 int ok = 0, notflag = 0;
6849 6873 char lc = '\0';
6850 6874
6851 6875 if (s1 == '\0')
6852 6876 return (0);
6853 6877
6854 6878 if (*p == '!') {
6855 6879 notflag = 1;
6856 6880 p++;
6857 6881 }
6858 6882
6859 6883 if ((c = *p++) == '\0')
6860 6884 return (0);
6861 6885
6862 6886 do {
6863 6887 if (c == '-' && lc != '\0' && *p != ']') {
6864 6888 if ((c = *p++) == '\0')
6865 6889 return (0);
6866 6890 if (c == '\\' && (c = *p++) == '\0')
6867 6891 return (0);
6868 6892
6869 6893 if (notflag) {
6870 6894 if (s1 < lc || s1 > c)
6871 6895 ok++;
6872 6896 else
6873 6897 return (0);
6874 6898 } else if (lc <= s1 && s1 <= c)
6875 6899 ok++;
6876 6900
6877 6901 } else if (c == '\\' && (c = *p++) == '\0')
6878 6902 return (0);
6879 6903
6880 6904 lc = c; /* save left-hand 'c' for next iteration */
6881 6905
6882 6906 if (notflag) {
6883 6907 if (s1 != c)
6884 6908 ok++;
6885 6909 else
6886 6910 return (0);
6887 6911 } else if (s1 == c)
6888 6912 ok++;
6889 6913
6890 6914 if ((c = *p++) == '\0')
6891 6915 return (0);
6892 6916
6893 6917 } while (c != ']');
6894 6918
6895 6919 if (ok)
6896 6920 goto top;
6897 6921
6898 6922 return (0);
6899 6923 }
6900 6924
6901 6925 case '\\':
6902 6926 if ((c = *p++) == '\0')
6903 6927 return (0);
6904 6928 /*FALLTHRU*/
6905 6929
6906 6930 default:
6907 6931 if (c != s1)
6908 6932 return (0);
6909 6933 /*FALLTHRU*/
6910 6934
6911 6935 case '?':
6912 6936 if (s1 != '\0')
6913 6937 goto top;
6914 6938 return (0);
6915 6939
6916 6940 case '*':
6917 6941 while (*p == '*')
6918 6942 p++; /* consecutive *'s are identical to a single one */
6919 6943
6920 6944 if (*p == '\0')
6921 6945 return (1);
6922 6946
6923 6947 for (s = olds; *s != '\0'; s++) {
6924 6948 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
6925 6949 return (gs);
6926 6950 }
6927 6951
6928 6952 return (0);
6929 6953 }
6930 6954 }
6931 6955
6932 6956 /*ARGSUSED*/
6933 6957 static int
6934 6958 dtrace_match_string(const char *s, const char *p, int depth)
6935 6959 {
6936 6960 return (s != NULL && strcmp(s, p) == 0);
6937 6961 }
6938 6962
6939 6963 /*ARGSUSED*/
6940 6964 static int
6941 6965 dtrace_match_nul(const char *s, const char *p, int depth)
6942 6966 {
6943 6967 return (1); /* always match the empty pattern */
6944 6968 }
6945 6969
6946 6970 /*ARGSUSED*/
6947 6971 static int
6948 6972 dtrace_match_nonzero(const char *s, const char *p, int depth)
6949 6973 {
6950 6974 return (s != NULL && s[0] != '\0');
6951 6975 }
6952 6976
6953 6977 static int
6954 6978 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
6955 6979 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
6956 6980 {
6957 6981 dtrace_probe_t template, *probe;
6958 6982 dtrace_hash_t *hash = NULL;
6959 6983 int len, rc, best = INT_MAX, nmatched = 0;
6960 6984 dtrace_id_t i;
6961 6985
6962 6986 ASSERT(MUTEX_HELD(&dtrace_lock));
6963 6987
6964 6988 /*
6965 6989 * If the probe ID is specified in the key, just lookup by ID and
6966 6990 * invoke the match callback once if a matching probe is found.
6967 6991 */
6968 6992 if (pkp->dtpk_id != DTRACE_IDNONE) {
6969 6993 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
6970 6994 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
6971 6995 if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
6972 6996 return (DTRACE_MATCH_FAIL);
6973 6997 nmatched++;
6974 6998 }
6975 6999 return (nmatched);
6976 7000 }
6977 7001
6978 7002 template.dtpr_mod = (char *)pkp->dtpk_mod;
6979 7003 template.dtpr_func = (char *)pkp->dtpk_func;
6980 7004 template.dtpr_name = (char *)pkp->dtpk_name;
6981 7005
6982 7006 /*
6983 7007 * We want to find the most distinct of the module name, function
6984 7008 * name, and name. So for each one that is not a glob pattern or
6985 7009 * empty string, we perform a lookup in the corresponding hash and
6986 7010 * use the hash table with the fewest collisions to do our search.
6987 7011 */
6988 7012 if (pkp->dtpk_mmatch == &dtrace_match_string &&
6989 7013 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
6990 7014 best = len;
6991 7015 hash = dtrace_bymod;
6992 7016 }
6993 7017
6994 7018 if (pkp->dtpk_fmatch == &dtrace_match_string &&
6995 7019 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
6996 7020 best = len;
6997 7021 hash = dtrace_byfunc;
6998 7022 }
6999 7023
7000 7024 if (pkp->dtpk_nmatch == &dtrace_match_string &&
7001 7025 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
7002 7026 best = len;
7003 7027 hash = dtrace_byname;
7004 7028 }
7005 7029
7006 7030 /*
7007 7031 * If we did not select a hash table, iterate over every probe and
7008 7032 * invoke our callback for each one that matches our input probe key.
7009 7033 */
7010 7034 if (hash == NULL) {
7011 7035 for (i = 0; i < dtrace_nprobes; i++) {
7012 7036 if ((probe = dtrace_probes[i]) == NULL ||
7013 7037 dtrace_match_probe(probe, pkp, priv, uid,
7014 7038 zoneid) <= 0)
7015 7039 continue;
7016 7040
7017 7041 nmatched++;
7018 7042
7019 7043 if ((rc = (*matched)(probe, arg)) !=
7020 7044 DTRACE_MATCH_NEXT) {
7021 7045 if (rc == DTRACE_MATCH_FAIL)
7022 7046 return (DTRACE_MATCH_FAIL);
7023 7047 break;
7024 7048 }
7025 7049 }
7026 7050
7027 7051 return (nmatched);
7028 7052 }
7029 7053
7030 7054 /*
7031 7055 * If we selected a hash table, iterate over each probe of the same key
7032 7056 * name and invoke the callback for every probe that matches the other
7033 7057 * attributes of our input probe key.
7034 7058 */
7035 7059 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
7036 7060 probe = *(DTRACE_HASHNEXT(hash, probe))) {
7037 7061
7038 7062 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
7039 7063 continue;
7040 7064
7041 7065 nmatched++;
7042 7066
7043 7067 if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
7044 7068 if (rc == DTRACE_MATCH_FAIL)
7045 7069 return (DTRACE_MATCH_FAIL);
7046 7070 break;
7047 7071 }
7048 7072 }
7049 7073
7050 7074 return (nmatched);
7051 7075 }
7052 7076
7053 7077 /*
7054 7078 * Return the function pointer dtrace_probecmp() should use to compare the
7055 7079 * specified pattern with a string. For NULL or empty patterns, we select
7056 7080 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7057 7081 * For non-empty non-glob strings, we use dtrace_match_string().
7058 7082 */
7059 7083 static dtrace_probekey_f *
7060 7084 dtrace_probekey_func(const char *p)
7061 7085 {
7062 7086 char c;
7063 7087
7064 7088 if (p == NULL || *p == '\0')
7065 7089 return (&dtrace_match_nul);
7066 7090
7067 7091 while ((c = *p++) != '\0') {
7068 7092 if (c == '[' || c == '?' || c == '*' || c == '\\')
7069 7093 return (&dtrace_match_glob);
7070 7094 }
7071 7095
7072 7096 return (&dtrace_match_string);
7073 7097 }
7074 7098
7075 7099 /*
7076 7100 * Build a probe comparison key for use with dtrace_match_probe() from the
7077 7101 * given probe description. By convention, a null key only matches anchored
7078 7102 * probes: if each field is the empty string, reset dtpk_fmatch to
7079 7103 * dtrace_match_nonzero().
7080 7104 */
7081 7105 static void
7082 7106 dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
7083 7107 {
7084 7108 pkp->dtpk_prov = pdp->dtpd_provider;
7085 7109 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
7086 7110
7087 7111 pkp->dtpk_mod = pdp->dtpd_mod;
7088 7112 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
7089 7113
7090 7114 pkp->dtpk_func = pdp->dtpd_func;
7091 7115 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
7092 7116
7093 7117 pkp->dtpk_name = pdp->dtpd_name;
7094 7118 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
7095 7119
7096 7120 pkp->dtpk_id = pdp->dtpd_id;
7097 7121
7098 7122 if (pkp->dtpk_id == DTRACE_IDNONE &&
7099 7123 pkp->dtpk_pmatch == &dtrace_match_nul &&
7100 7124 pkp->dtpk_mmatch == &dtrace_match_nul &&
7101 7125 pkp->dtpk_fmatch == &dtrace_match_nul &&
7102 7126 pkp->dtpk_nmatch == &dtrace_match_nul)
7103 7127 pkp->dtpk_fmatch = &dtrace_match_nonzero;
7104 7128 }
7105 7129
7106 7130 /*
7107 7131 * DTrace Provider-to-Framework API Functions
7108 7132 *
7109 7133 * These functions implement much of the Provider-to-Framework API, as
7110 7134 * described in <sys/dtrace.h>. The parts of the API not in this section are
7111 7135 * the functions in the API for probe management (found below), and
7112 7136 * dtrace_probe() itself (found above).
7113 7137 */
7114 7138
7115 7139 /*
7116 7140 * Register the calling provider with the DTrace framework. This should
7117 7141 * generally be called by DTrace providers in their attach(9E) entry point.
7118 7142 */
7119 7143 int
7120 7144 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
7121 7145 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
7122 7146 {
7123 7147 dtrace_provider_t *provider;
7124 7148
7125 7149 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
7126 7150 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7127 7151 "arguments", name ? name : "<NULL>");
7128 7152 return (EINVAL);
7129 7153 }
7130 7154
7131 7155 if (name[0] == '\0' || dtrace_badname(name)) {
7132 7156 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7133 7157 "provider name", name);
7134 7158 return (EINVAL);
7135 7159 }
7136 7160
7137 7161 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
7138 7162 pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
7139 7163 pops->dtps_destroy == NULL ||
7140 7164 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
7141 7165 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7142 7166 "provider ops", name);
7143 7167 return (EINVAL);
7144 7168 }
7145 7169
7146 7170 if (dtrace_badattr(&pap->dtpa_provider) ||
7147 7171 dtrace_badattr(&pap->dtpa_mod) ||
7148 7172 dtrace_badattr(&pap->dtpa_func) ||
7149 7173 dtrace_badattr(&pap->dtpa_name) ||
7150 7174 dtrace_badattr(&pap->dtpa_args)) {
7151 7175 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7152 7176 "provider attributes", name);
7153 7177 return (EINVAL);
7154 7178 }
7155 7179
7156 7180 if (priv & ~DTRACE_PRIV_ALL) {
7157 7181 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7158 7182 "privilege attributes", name);
7159 7183 return (EINVAL);
7160 7184 }
7161 7185
7162 7186 if ((priv & DTRACE_PRIV_KERNEL) &&
7163 7187 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
7164 7188 pops->dtps_mode == NULL) {
7165 7189 cmn_err(CE_WARN, "failed to register provider '%s': need "
7166 7190 "dtps_mode() op for given privilege attributes", name);
7167 7191 return (EINVAL);
7168 7192 }
7169 7193
7170 7194 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
7171 7195 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
7172 7196 (void) strcpy(provider->dtpv_name, name);
7173 7197
7174 7198 provider->dtpv_attr = *pap;
7175 7199 provider->dtpv_priv.dtpp_flags = priv;
7176 7200 if (cr != NULL) {
7177 7201 provider->dtpv_priv.dtpp_uid = crgetuid(cr);
7178 7202 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
7179 7203 }
7180 7204 provider->dtpv_pops = *pops;
7181 7205
7182 7206 if (pops->dtps_provide == NULL) {
7183 7207 ASSERT(pops->dtps_provide_module != NULL);
7184 7208 provider->dtpv_pops.dtps_provide =
7185 7209 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop;
7186 7210 }
7187 7211
7188 7212 if (pops->dtps_provide_module == NULL) {
7189 7213 ASSERT(pops->dtps_provide != NULL);
7190 7214 provider->dtpv_pops.dtps_provide_module =
7191 7215 (void (*)(void *, struct modctl *))dtrace_nullop;
7192 7216 }
7193 7217
7194 7218 if (pops->dtps_suspend == NULL) {
7195 7219 ASSERT(pops->dtps_resume == NULL);
7196 7220 provider->dtpv_pops.dtps_suspend =
7197 7221 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7198 7222 provider->dtpv_pops.dtps_resume =
7199 7223 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7200 7224 }
7201 7225
7202 7226 provider->dtpv_arg = arg;
7203 7227 *idp = (dtrace_provider_id_t)provider;
7204 7228
7205 7229 if (pops == &dtrace_provider_ops) {
7206 7230 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7207 7231 ASSERT(MUTEX_HELD(&dtrace_lock));
7208 7232 ASSERT(dtrace_anon.dta_enabling == NULL);
7209 7233
7210 7234 /*
7211 7235 * We make sure that the DTrace provider is at the head of
7212 7236 * the provider chain.
7213 7237 */
7214 7238 provider->dtpv_next = dtrace_provider;
7215 7239 dtrace_provider = provider;
7216 7240 return (0);
7217 7241 }
7218 7242
7219 7243 mutex_enter(&dtrace_provider_lock);
7220 7244 mutex_enter(&dtrace_lock);
7221 7245
7222 7246 /*
7223 7247 * If there is at least one provider registered, we'll add this
7224 7248 * provider after the first provider.
7225 7249 */
7226 7250 if (dtrace_provider != NULL) {
7227 7251 provider->dtpv_next = dtrace_provider->dtpv_next;
7228 7252 dtrace_provider->dtpv_next = provider;
7229 7253 } else {
7230 7254 dtrace_provider = provider;
7231 7255 }
7232 7256
7233 7257 if (dtrace_retained != NULL) {
7234 7258 dtrace_enabling_provide(provider);
7235 7259
7236 7260 /*
7237 7261 * Now we need to call dtrace_enabling_matchall() -- which
7238 7262 * will acquire cpu_lock and dtrace_lock. We therefore need
7239 7263 * to drop all of our locks before calling into it...
7240 7264 */
7241 7265 mutex_exit(&dtrace_lock);
7242 7266 mutex_exit(&dtrace_provider_lock);
7243 7267 dtrace_enabling_matchall();
7244 7268
7245 7269 return (0);
7246 7270 }
7247 7271
7248 7272 mutex_exit(&dtrace_lock);
7249 7273 mutex_exit(&dtrace_provider_lock);
7250 7274
7251 7275 return (0);
7252 7276 }
7253 7277
7254 7278 /*
7255 7279 * Unregister the specified provider from the DTrace framework. This should
7256 7280 * generally be called by DTrace providers in their detach(9E) entry point.
7257 7281 */
7258 7282 int
7259 7283 dtrace_unregister(dtrace_provider_id_t id)
7260 7284 {
7261 7285 dtrace_provider_t *old = (dtrace_provider_t *)id;
7262 7286 dtrace_provider_t *prev = NULL;
7263 7287 int i, self = 0, noreap = 0;
7264 7288 dtrace_probe_t *probe, *first = NULL;
7265 7289
7266 7290 if (old->dtpv_pops.dtps_enable ==
7267 7291 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
7268 7292 /*
7269 7293 * If DTrace itself is the provider, we're called with locks
7270 7294 * already held.
7271 7295 */
7272 7296 ASSERT(old == dtrace_provider);
7273 7297 ASSERT(dtrace_devi != NULL);
7274 7298 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7275 7299 ASSERT(MUTEX_HELD(&dtrace_lock));
7276 7300 self = 1;
7277 7301
7278 7302 if (dtrace_provider->dtpv_next != NULL) {
7279 7303 /*
7280 7304 * There's another provider here; return failure.
7281 7305 */
7282 7306 return (EBUSY);
7283 7307 }
7284 7308 } else {
7285 7309 mutex_enter(&dtrace_provider_lock);
7286 7310 mutex_enter(&mod_lock);
7287 7311 mutex_enter(&dtrace_lock);
7288 7312 }
7289 7313
7290 7314 /*
7291 7315 * If anyone has /dev/dtrace open, or if there are anonymous enabled
7292 7316 * probes, we refuse to let providers slither away, unless this
7293 7317 * provider has already been explicitly invalidated.
7294 7318 */
7295 7319 if (!old->dtpv_defunct &&
7296 7320 (dtrace_opens || (dtrace_anon.dta_state != NULL &&
7297 7321 dtrace_anon.dta_state->dts_necbs > 0))) {
7298 7322 if (!self) {
7299 7323 mutex_exit(&dtrace_lock);
7300 7324 mutex_exit(&mod_lock);
7301 7325 mutex_exit(&dtrace_provider_lock);
7302 7326 }
7303 7327 return (EBUSY);
7304 7328 }
7305 7329
7306 7330 /*
7307 7331 * Attempt to destroy the probes associated with this provider.
7308 7332 */
7309 7333 for (i = 0; i < dtrace_nprobes; i++) {
7310 7334 if ((probe = dtrace_probes[i]) == NULL)
7311 7335 continue;
7312 7336
7313 7337 if (probe->dtpr_provider != old)
7314 7338 continue;
7315 7339
7316 7340 if (probe->dtpr_ecb == NULL)
7317 7341 continue;
7318 7342
7319 7343 /*
7320 7344 * If we are trying to unregister a defunct provider, and the
7321 7345 * provider was made defunct within the interval dictated by
7322 7346 * dtrace_unregister_defunct_reap, we'll (asynchronously)
7323 7347 * attempt to reap our enablings. To denote that the provider
7324 7348 * should reattempt to unregister itself at some point in the
7325 7349 * future, we will return a differentiable error code (EAGAIN
7326 7350 * instead of EBUSY) in this case.
7327 7351 */
7328 7352 if (dtrace_gethrtime() - old->dtpv_defunct >
7329 7353 dtrace_unregister_defunct_reap)
7330 7354 noreap = 1;
7331 7355
7332 7356 if (!self) {
7333 7357 mutex_exit(&dtrace_lock);
7334 7358 mutex_exit(&mod_lock);
7335 7359 mutex_exit(&dtrace_provider_lock);
7336 7360 }
7337 7361
7338 7362 if (noreap)
7339 7363 return (EBUSY);
7340 7364
7341 7365 (void) taskq_dispatch(dtrace_taskq,
7342 7366 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
7343 7367
7344 7368 return (EAGAIN);
7345 7369 }
7346 7370
7347 7371 /*
7348 7372 * All of the probes for this provider are disabled; we can safely
7349 7373 * remove all of them from their hash chains and from the probe array.
7350 7374 */
7351 7375 for (i = 0; i < dtrace_nprobes; i++) {
7352 7376 if ((probe = dtrace_probes[i]) == NULL)
7353 7377 continue;
7354 7378
7355 7379 if (probe->dtpr_provider != old)
7356 7380 continue;
7357 7381
7358 7382 dtrace_probes[i] = NULL;
7359 7383
7360 7384 dtrace_hash_remove(dtrace_bymod, probe);
7361 7385 dtrace_hash_remove(dtrace_byfunc, probe);
7362 7386 dtrace_hash_remove(dtrace_byname, probe);
7363 7387
7364 7388 if (first == NULL) {
7365 7389 first = probe;
7366 7390 probe->dtpr_nextmod = NULL;
7367 7391 } else {
7368 7392 probe->dtpr_nextmod = first;
7369 7393 first = probe;
7370 7394 }
7371 7395 }
7372 7396
7373 7397 /*
7374 7398 * The provider's probes have been removed from the hash chains and
7375 7399 * from the probe array. Now issue a dtrace_sync() to be sure that
7376 7400 * everyone has cleared out from any probe array processing.
7377 7401 */
7378 7402 dtrace_sync();
7379 7403
7380 7404 for (probe = first; probe != NULL; probe = first) {
7381 7405 first = probe->dtpr_nextmod;
7382 7406
7383 7407 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
7384 7408 probe->dtpr_arg);
7385 7409 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7386 7410 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7387 7411 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7388 7412 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
7389 7413 kmem_free(probe, sizeof (dtrace_probe_t));
7390 7414 }
7391 7415
7392 7416 if ((prev = dtrace_provider) == old) {
7393 7417 ASSERT(self || dtrace_devi == NULL);
7394 7418 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
7395 7419 dtrace_provider = old->dtpv_next;
7396 7420 } else {
7397 7421 while (prev != NULL && prev->dtpv_next != old)
7398 7422 prev = prev->dtpv_next;
7399 7423
7400 7424 if (prev == NULL) {
7401 7425 panic("attempt to unregister non-existent "
7402 7426 "dtrace provider %p\n", (void *)id);
7403 7427 }
7404 7428
7405 7429 prev->dtpv_next = old->dtpv_next;
7406 7430 }
7407 7431
7408 7432 if (!self) {
7409 7433 mutex_exit(&dtrace_lock);
7410 7434 mutex_exit(&mod_lock);
7411 7435 mutex_exit(&dtrace_provider_lock);
7412 7436 }
7413 7437
7414 7438 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
7415 7439 kmem_free(old, sizeof (dtrace_provider_t));
7416 7440
7417 7441 return (0);
7418 7442 }
7419 7443
7420 7444 /*
7421 7445 * Invalidate the specified provider. All subsequent probe lookups for the
7422 7446 * specified provider will fail, but its probes will not be removed.
7423 7447 */
7424 7448 void
7425 7449 dtrace_invalidate(dtrace_provider_id_t id)
7426 7450 {
7427 7451 dtrace_provider_t *pvp = (dtrace_provider_t *)id;
7428 7452
7429 7453 ASSERT(pvp->dtpv_pops.dtps_enable !=
7430 7454 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
7431 7455
7432 7456 mutex_enter(&dtrace_provider_lock);
7433 7457 mutex_enter(&dtrace_lock);
7434 7458
7435 7459 pvp->dtpv_defunct = dtrace_gethrtime();
7436 7460
7437 7461 mutex_exit(&dtrace_lock);
7438 7462 mutex_exit(&dtrace_provider_lock);
7439 7463 }
7440 7464
7441 7465 /*
7442 7466 * Indicate whether or not DTrace has attached.
7443 7467 */
7444 7468 int
7445 7469 dtrace_attached(void)
7446 7470 {
7447 7471 /*
7448 7472 * dtrace_provider will be non-NULL iff the DTrace driver has
7449 7473 * attached. (It's non-NULL because DTrace is always itself a
7450 7474 * provider.)
7451 7475 */
7452 7476 return (dtrace_provider != NULL);
7453 7477 }
7454 7478
7455 7479 /*
7456 7480 * Remove all the unenabled probes for the given provider. This function is
7457 7481 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7458 7482 * -- just as many of its associated probes as it can.
7459 7483 */
7460 7484 int
7461 7485 dtrace_condense(dtrace_provider_id_t id)
7462 7486 {
7463 7487 dtrace_provider_t *prov = (dtrace_provider_t *)id;
7464 7488 int i;
7465 7489 dtrace_probe_t *probe;
7466 7490
7467 7491 /*
7468 7492 * Make sure this isn't the dtrace provider itself.
7469 7493 */
7470 7494 ASSERT(prov->dtpv_pops.dtps_enable !=
7471 7495 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
7472 7496
7473 7497 mutex_enter(&dtrace_provider_lock);
7474 7498 mutex_enter(&dtrace_lock);
7475 7499
7476 7500 /*
7477 7501 * Attempt to destroy the probes associated with this provider.
7478 7502 */
7479 7503 for (i = 0; i < dtrace_nprobes; i++) {
7480 7504 if ((probe = dtrace_probes[i]) == NULL)
7481 7505 continue;
7482 7506
7483 7507 if (probe->dtpr_provider != prov)
7484 7508 continue;
7485 7509
7486 7510 if (probe->dtpr_ecb != NULL)
7487 7511 continue;
7488 7512
7489 7513 dtrace_probes[i] = NULL;
7490 7514
7491 7515 dtrace_hash_remove(dtrace_bymod, probe);
7492 7516 dtrace_hash_remove(dtrace_byfunc, probe);
7493 7517 dtrace_hash_remove(dtrace_byname, probe);
7494 7518
7495 7519 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
7496 7520 probe->dtpr_arg);
7497 7521 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7498 7522 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7499 7523 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7500 7524 kmem_free(probe, sizeof (dtrace_probe_t));
7501 7525 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
7502 7526 }
7503 7527
7504 7528 mutex_exit(&dtrace_lock);
7505 7529 mutex_exit(&dtrace_provider_lock);
7506 7530
7507 7531 return (0);
7508 7532 }
7509 7533
7510 7534 /*
7511 7535 * DTrace Probe Management Functions
7512 7536 *
7513 7537 * The functions in this section perform the DTrace probe management,
7514 7538 * including functions to create probes, look-up probes, and call into the
7515 7539 * providers to request that probes be provided. Some of these functions are
7516 7540 * in the Provider-to-Framework API; these functions can be identified by the
7517 7541 * fact that they are not declared "static".
7518 7542 */
7519 7543
7520 7544 /*
7521 7545 * Create a probe with the specified module name, function name, and name.
7522 7546 */
7523 7547 dtrace_id_t
7524 7548 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
7525 7549 const char *func, const char *name, int aframes, void *arg)
7526 7550 {
7527 7551 dtrace_probe_t *probe, **probes;
7528 7552 dtrace_provider_t *provider = (dtrace_provider_t *)prov;
7529 7553 dtrace_id_t id;
7530 7554
7531 7555 if (provider == dtrace_provider) {
7532 7556 ASSERT(MUTEX_HELD(&dtrace_lock));
7533 7557 } else {
7534 7558 mutex_enter(&dtrace_lock);
7535 7559 }
7536 7560
7537 7561 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
7538 7562 VM_BESTFIT | VM_SLEEP);
7539 7563 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
7540 7564
7541 7565 probe->dtpr_id = id;
7542 7566 probe->dtpr_gen = dtrace_probegen++;
7543 7567 probe->dtpr_mod = dtrace_strdup(mod);
7544 7568 probe->dtpr_func = dtrace_strdup(func);
7545 7569 probe->dtpr_name = dtrace_strdup(name);
7546 7570 probe->dtpr_arg = arg;
7547 7571 probe->dtpr_aframes = aframes;
7548 7572 probe->dtpr_provider = provider;
7549 7573
7550 7574 dtrace_hash_add(dtrace_bymod, probe);
7551 7575 dtrace_hash_add(dtrace_byfunc, probe);
7552 7576 dtrace_hash_add(dtrace_byname, probe);
7553 7577
7554 7578 if (id - 1 >= dtrace_nprobes) {
7555 7579 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
7556 7580 size_t nsize = osize << 1;
7557 7581
7558 7582 if (nsize == 0) {
7559 7583 ASSERT(osize == 0);
7560 7584 ASSERT(dtrace_probes == NULL);
7561 7585 nsize = sizeof (dtrace_probe_t *);
7562 7586 }
7563 7587
7564 7588 probes = kmem_zalloc(nsize, KM_SLEEP);
7565 7589
7566 7590 if (dtrace_probes == NULL) {
7567 7591 ASSERT(osize == 0);
7568 7592 dtrace_probes = probes;
7569 7593 dtrace_nprobes = 1;
7570 7594 } else {
7571 7595 dtrace_probe_t **oprobes = dtrace_probes;
7572 7596
7573 7597 bcopy(oprobes, probes, osize);
7574 7598 dtrace_membar_producer();
7575 7599 dtrace_probes = probes;
7576 7600
7577 7601 dtrace_sync();
7578 7602
7579 7603 /*
7580 7604 * All CPUs are now seeing the new probes array; we can
7581 7605 * safely free the old array.
7582 7606 */
7583 7607 kmem_free(oprobes, osize);
7584 7608 dtrace_nprobes <<= 1;
7585 7609 }
7586 7610
7587 7611 ASSERT(id - 1 < dtrace_nprobes);
7588 7612 }
7589 7613
7590 7614 ASSERT(dtrace_probes[id - 1] == NULL);
7591 7615 dtrace_probes[id - 1] = probe;
7592 7616
7593 7617 if (provider != dtrace_provider)
7594 7618 mutex_exit(&dtrace_lock);
7595 7619
7596 7620 return (id);
7597 7621 }
7598 7622
7599 7623 static dtrace_probe_t *
7600 7624 dtrace_probe_lookup_id(dtrace_id_t id)
7601 7625 {
7602 7626 ASSERT(MUTEX_HELD(&dtrace_lock));
7603 7627
7604 7628 if (id == 0 || id > dtrace_nprobes)
7605 7629 return (NULL);
7606 7630
7607 7631 return (dtrace_probes[id - 1]);
7608 7632 }
7609 7633
7610 7634 static int
7611 7635 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
7612 7636 {
7613 7637 *((dtrace_id_t *)arg) = probe->dtpr_id;
7614 7638
7615 7639 return (DTRACE_MATCH_DONE);
7616 7640 }
7617 7641
7618 7642 /*
7619 7643 * Look up a probe based on provider and one or more of module name, function
7620 7644 * name and probe name.
7621 7645 */
7622 7646 dtrace_id_t
7623 7647 dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
7624 7648 const char *func, const char *name)
7625 7649 {
7626 7650 dtrace_probekey_t pkey;
7627 7651 dtrace_id_t id;
7628 7652 int match;
7629 7653
7630 7654 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
7631 7655 pkey.dtpk_pmatch = &dtrace_match_string;
7632 7656 pkey.dtpk_mod = mod;
7633 7657 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
7634 7658 pkey.dtpk_func = func;
7635 7659 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
7636 7660 pkey.dtpk_name = name;
7637 7661 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
7638 7662 pkey.dtpk_id = DTRACE_IDNONE;
7639 7663
7640 7664 mutex_enter(&dtrace_lock);
7641 7665 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
7642 7666 dtrace_probe_lookup_match, &id);
7643 7667 mutex_exit(&dtrace_lock);
7644 7668
7645 7669 ASSERT(match == 1 || match == 0);
7646 7670 return (match ? id : 0);
7647 7671 }
7648 7672
7649 7673 /*
7650 7674 * Returns the probe argument associated with the specified probe.
7651 7675 */
7652 7676 void *
7653 7677 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
7654 7678 {
7655 7679 dtrace_probe_t *probe;
7656 7680 void *rval = NULL;
7657 7681
7658 7682 mutex_enter(&dtrace_lock);
7659 7683
7660 7684 if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
7661 7685 probe->dtpr_provider == (dtrace_provider_t *)id)
7662 7686 rval = probe->dtpr_arg;
7663 7687
7664 7688 mutex_exit(&dtrace_lock);
7665 7689
7666 7690 return (rval);
7667 7691 }
7668 7692
7669 7693 /*
7670 7694 * Copy a probe into a probe description.
7671 7695 */
7672 7696 static void
7673 7697 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
7674 7698 {
7675 7699 bzero(pdp, sizeof (dtrace_probedesc_t));
7676 7700 pdp->dtpd_id = prp->dtpr_id;
7677 7701
7678 7702 (void) strncpy(pdp->dtpd_provider,
7679 7703 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
7680 7704
7681 7705 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
7682 7706 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
7683 7707 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
7684 7708 }
7685 7709
7686 7710 /*
7687 7711 * Called to indicate that a probe -- or probes -- should be provided by a
7688 7712 * specfied provider. If the specified description is NULL, the provider will
7689 7713 * be told to provide all of its probes. (This is done whenever a new
7690 7714 * consumer comes along, or whenever a retained enabling is to be matched.) If
7691 7715 * the specified description is non-NULL, the provider is given the
7692 7716 * opportunity to dynamically provide the specified probe, allowing providers
7693 7717 * to support the creation of probes on-the-fly. (So-called _autocreated_
7694 7718 * probes.) If the provider is NULL, the operations will be applied to all
7695 7719 * providers; if the provider is non-NULL the operations will only be applied
7696 7720 * to the specified provider. The dtrace_provider_lock must be held, and the
7697 7721 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7698 7722 * will need to grab the dtrace_lock when it reenters the framework through
7699 7723 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7700 7724 */
7701 7725 static void
7702 7726 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
7703 7727 {
7704 7728 struct modctl *ctl;
7705 7729 int all = 0;
7706 7730
7707 7731 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7708 7732
7709 7733 if (prv == NULL) {
7710 7734 all = 1;
7711 7735 prv = dtrace_provider;
7712 7736 }
7713 7737
7714 7738 do {
7715 7739 /*
7716 7740 * First, call the blanket provide operation.
7717 7741 */
7718 7742 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
7719 7743
7720 7744 /*
7721 7745 * Now call the per-module provide operation. We will grab
7722 7746 * mod_lock to prevent the list from being modified. Note
7723 7747 * that this also prevents the mod_busy bits from changing.
7724 7748 * (mod_busy can only be changed with mod_lock held.)
7725 7749 */
7726 7750 mutex_enter(&mod_lock);
7727 7751
7728 7752 ctl = &modules;
7729 7753 do {
7730 7754 if (ctl->mod_busy || ctl->mod_mp == NULL)
7731 7755 continue;
7732 7756
7733 7757 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
7734 7758
7735 7759 } while ((ctl = ctl->mod_next) != &modules);
7736 7760
7737 7761 mutex_exit(&mod_lock);
7738 7762 } while (all && (prv = prv->dtpv_next) != NULL);
7739 7763 }
7740 7764
7741 7765 /*
7742 7766 * Iterate over each probe, and call the Framework-to-Provider API function
7743 7767 * denoted by offs.
7744 7768 */
7745 7769 static void
7746 7770 dtrace_probe_foreach(uintptr_t offs)
7747 7771 {
7748 7772 dtrace_provider_t *prov;
7749 7773 void (*func)(void *, dtrace_id_t, void *);
7750 7774 dtrace_probe_t *probe;
7751 7775 dtrace_icookie_t cookie;
7752 7776 int i;
7753 7777
7754 7778 /*
7755 7779 * We disable interrupts to walk through the probe array. This is
7756 7780 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7757 7781 * won't see stale data.
7758 7782 */
7759 7783 cookie = dtrace_interrupt_disable();
7760 7784
7761 7785 for (i = 0; i < dtrace_nprobes; i++) {
7762 7786 if ((probe = dtrace_probes[i]) == NULL)
7763 7787 continue;
7764 7788
7765 7789 if (probe->dtpr_ecb == NULL) {
7766 7790 /*
7767 7791 * This probe isn't enabled -- don't call the function.
7768 7792 */
7769 7793 continue;
7770 7794 }
7771 7795
7772 7796 prov = probe->dtpr_provider;
7773 7797 func = *((void(**)(void *, dtrace_id_t, void *))
7774 7798 ((uintptr_t)&prov->dtpv_pops + offs));
7775 7799
7776 7800 func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
7777 7801 }
7778 7802
7779 7803 dtrace_interrupt_enable(cookie);
7780 7804 }
7781 7805
7782 7806 static int
7783 7807 dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
7784 7808 {
7785 7809 dtrace_probekey_t pkey;
7786 7810 uint32_t priv;
7787 7811 uid_t uid;
7788 7812 zoneid_t zoneid;
7789 7813
7790 7814 ASSERT(MUTEX_HELD(&dtrace_lock));
7791 7815 dtrace_ecb_create_cache = NULL;
7792 7816
7793 7817 if (desc == NULL) {
7794 7818 /*
7795 7819 * If we're passed a NULL description, we're being asked to
7796 7820 * create an ECB with a NULL probe.
7797 7821 */
7798 7822 (void) dtrace_ecb_create_enable(NULL, enab);
7799 7823 return (0);
7800 7824 }
7801 7825
7802 7826 dtrace_probekey(desc, &pkey);
7803 7827 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
7804 7828 &priv, &uid, &zoneid);
7805 7829
7806 7830 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
7807 7831 enab));
7808 7832 }
7809 7833
7810 7834 /*
7811 7835 * DTrace Helper Provider Functions
7812 7836 */
7813 7837 static void
7814 7838 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
7815 7839 {
7816 7840 attr->dtat_name = DOF_ATTR_NAME(dofattr);
7817 7841 attr->dtat_data = DOF_ATTR_DATA(dofattr);
7818 7842 attr->dtat_class = DOF_ATTR_CLASS(dofattr);
7819 7843 }
7820 7844
7821 7845 static void
7822 7846 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
7823 7847 const dof_provider_t *dofprov, char *strtab)
7824 7848 {
7825 7849 hprov->dthpv_provname = strtab + dofprov->dofpv_name;
7826 7850 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
7827 7851 dofprov->dofpv_provattr);
7828 7852 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
7829 7853 dofprov->dofpv_modattr);
7830 7854 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
7831 7855 dofprov->dofpv_funcattr);
7832 7856 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
7833 7857 dofprov->dofpv_nameattr);
7834 7858 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
7835 7859 dofprov->dofpv_argsattr);
7836 7860 }
7837 7861
7838 7862 static void
7839 7863 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
7840 7864 {
7841 7865 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7842 7866 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7843 7867 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
7844 7868 dof_provider_t *provider;
7845 7869 dof_probe_t *probe;
7846 7870 uint32_t *off, *enoff;
7847 7871 uint8_t *arg;
7848 7872 char *strtab;
7849 7873 uint_t i, nprobes;
7850 7874 dtrace_helper_provdesc_t dhpv;
7851 7875 dtrace_helper_probedesc_t dhpb;
7852 7876 dtrace_meta_t *meta = dtrace_meta_pid;
7853 7877 dtrace_mops_t *mops = &meta->dtm_mops;
7854 7878 void *parg;
7855 7879
7856 7880 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
7857 7881 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7858 7882 provider->dofpv_strtab * dof->dofh_secsize);
7859 7883 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7860 7884 provider->dofpv_probes * dof->dofh_secsize);
7861 7885 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7862 7886 provider->dofpv_prargs * dof->dofh_secsize);
7863 7887 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7864 7888 provider->dofpv_proffs * dof->dofh_secsize);
7865 7889
7866 7890 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
7867 7891 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
7868 7892 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
7869 7893 enoff = NULL;
7870 7894
7871 7895 /*
7872 7896 * See dtrace_helper_provider_validate().
7873 7897 */
7874 7898 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
7875 7899 provider->dofpv_prenoffs != DOF_SECT_NONE) {
7876 7900 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7877 7901 provider->dofpv_prenoffs * dof->dofh_secsize);
7878 7902 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
7879 7903 }
7880 7904
7881 7905 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
7882 7906
7883 7907 /*
7884 7908 * Create the provider.
7885 7909 */
7886 7910 dtrace_dofprov2hprov(&dhpv, provider, strtab);
7887 7911
7888 7912 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
7889 7913 return;
7890 7914
7891 7915 meta->dtm_count++;
7892 7916
7893 7917 /*
7894 7918 * Create the probes.
7895 7919 */
7896 7920 for (i = 0; i < nprobes; i++) {
7897 7921 probe = (dof_probe_t *)(uintptr_t)(daddr +
7898 7922 prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
7899 7923
7900 7924 dhpb.dthpb_mod = dhp->dofhp_mod;
7901 7925 dhpb.dthpb_func = strtab + probe->dofpr_func;
7902 7926 dhpb.dthpb_name = strtab + probe->dofpr_name;
7903 7927 dhpb.dthpb_base = probe->dofpr_addr;
7904 7928 dhpb.dthpb_offs = off + probe->dofpr_offidx;
7905 7929 dhpb.dthpb_noffs = probe->dofpr_noffs;
7906 7930 if (enoff != NULL) {
7907 7931 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
7908 7932 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
7909 7933 } else {
7910 7934 dhpb.dthpb_enoffs = NULL;
7911 7935 dhpb.dthpb_nenoffs = 0;
7912 7936 }
7913 7937 dhpb.dthpb_args = arg + probe->dofpr_argidx;
7914 7938 dhpb.dthpb_nargc = probe->dofpr_nargc;
7915 7939 dhpb.dthpb_xargc = probe->dofpr_xargc;
7916 7940 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
7917 7941 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
7918 7942
7919 7943 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
7920 7944 }
7921 7945 }
7922 7946
7923 7947 static void
7924 7948 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
7925 7949 {
7926 7950 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7927 7951 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7928 7952 int i;
7929 7953
7930 7954 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
7931 7955
7932 7956 for (i = 0; i < dof->dofh_secnum; i++) {
7933 7957 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
7934 7958 dof->dofh_secoff + i * dof->dofh_secsize);
7935 7959
7936 7960 if (sec->dofs_type != DOF_SECT_PROVIDER)
7937 7961 continue;
7938 7962
7939 7963 dtrace_helper_provide_one(dhp, sec, pid);
7940 7964 }
7941 7965
7942 7966 /*
7943 7967 * We may have just created probes, so we must now rematch against
7944 7968 * any retained enablings. Note that this call will acquire both
7945 7969 * cpu_lock and dtrace_lock; the fact that we are holding
7946 7970 * dtrace_meta_lock now is what defines the ordering with respect to
7947 7971 * these three locks.
7948 7972 */
7949 7973 dtrace_enabling_matchall();
7950 7974 }
7951 7975
7952 7976 static void
7953 7977 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
7954 7978 {
7955 7979 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7956 7980 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7957 7981 dof_sec_t *str_sec;
7958 7982 dof_provider_t *provider;
7959 7983 char *strtab;
7960 7984 dtrace_helper_provdesc_t dhpv;
7961 7985 dtrace_meta_t *meta = dtrace_meta_pid;
7962 7986 dtrace_mops_t *mops = &meta->dtm_mops;
7963 7987
7964 7988 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
7965 7989 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7966 7990 provider->dofpv_strtab * dof->dofh_secsize);
7967 7991
7968 7992 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
7969 7993
7970 7994 /*
7971 7995 * Create the provider.
7972 7996 */
7973 7997 dtrace_dofprov2hprov(&dhpv, provider, strtab);
7974 7998
7975 7999 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
7976 8000
7977 8001 meta->dtm_count--;
7978 8002 }
7979 8003
7980 8004 static void
7981 8005 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
7982 8006 {
7983 8007 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7984 8008 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7985 8009 int i;
7986 8010
7987 8011 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
7988 8012
7989 8013 for (i = 0; i < dof->dofh_secnum; i++) {
7990 8014 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
7991 8015 dof->dofh_secoff + i * dof->dofh_secsize);
7992 8016
7993 8017 if (sec->dofs_type != DOF_SECT_PROVIDER)
7994 8018 continue;
7995 8019
7996 8020 dtrace_helper_provider_remove_one(dhp, sec, pid);
7997 8021 }
7998 8022 }
7999 8023
8000 8024 /*
8001 8025 * DTrace Meta Provider-to-Framework API Functions
8002 8026 *
8003 8027 * These functions implement the Meta Provider-to-Framework API, as described
8004 8028 * in <sys/dtrace.h>.
8005 8029 */
8006 8030 int
8007 8031 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
8008 8032 dtrace_meta_provider_id_t *idp)
8009 8033 {
8010 8034 dtrace_meta_t *meta;
8011 8035 dtrace_helpers_t *help, *next;
8012 8036 int i;
8013 8037
8014 8038 *idp = DTRACE_METAPROVNONE;
8015 8039
8016 8040 /*
8017 8041 * We strictly don't need the name, but we hold onto it for
8018 8042 * debuggability. All hail error queues!
8019 8043 */
8020 8044 if (name == NULL) {
8021 8045 cmn_err(CE_WARN, "failed to register meta-provider: "
8022 8046 "invalid name");
8023 8047 return (EINVAL);
8024 8048 }
8025 8049
8026 8050 if (mops == NULL ||
8027 8051 mops->dtms_create_probe == NULL ||
8028 8052 mops->dtms_provide_pid == NULL ||
8029 8053 mops->dtms_remove_pid == NULL) {
8030 8054 cmn_err(CE_WARN, "failed to register meta-register %s: "
8031 8055 "invalid ops", name);
8032 8056 return (EINVAL);
8033 8057 }
8034 8058
8035 8059 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
8036 8060 meta->dtm_mops = *mops;
8037 8061 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
8038 8062 (void) strcpy(meta->dtm_name, name);
8039 8063 meta->dtm_arg = arg;
8040 8064
8041 8065 mutex_enter(&dtrace_meta_lock);
8042 8066 mutex_enter(&dtrace_lock);
8043 8067
8044 8068 if (dtrace_meta_pid != NULL) {
8045 8069 mutex_exit(&dtrace_lock);
8046 8070 mutex_exit(&dtrace_meta_lock);
8047 8071 cmn_err(CE_WARN, "failed to register meta-register %s: "
8048 8072 "user-land meta-provider exists", name);
8049 8073 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
8050 8074 kmem_free(meta, sizeof (dtrace_meta_t));
8051 8075 return (EINVAL);
8052 8076 }
8053 8077
8054 8078 dtrace_meta_pid = meta;
8055 8079 *idp = (dtrace_meta_provider_id_t)meta;
8056 8080
8057 8081 /*
8058 8082 * If there are providers and probes ready to go, pass them
8059 8083 * off to the new meta provider now.
8060 8084 */
8061 8085
8062 8086 help = dtrace_deferred_pid;
8063 8087 dtrace_deferred_pid = NULL;
8064 8088
8065 8089 mutex_exit(&dtrace_lock);
8066 8090
8067 8091 while (help != NULL) {
8068 8092 for (i = 0; i < help->dthps_nprovs; i++) {
8069 8093 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
8070 8094 help->dthps_pid);
8071 8095 }
8072 8096
8073 8097 next = help->dthps_next;
8074 8098 help->dthps_next = NULL;
8075 8099 help->dthps_prev = NULL;
8076 8100 help->dthps_deferred = 0;
8077 8101 help = next;
8078 8102 }
8079 8103
8080 8104 mutex_exit(&dtrace_meta_lock);
8081 8105
8082 8106 return (0);
8083 8107 }
8084 8108
8085 8109 int
8086 8110 dtrace_meta_unregister(dtrace_meta_provider_id_t id)
8087 8111 {
8088 8112 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
8089 8113
8090 8114 mutex_enter(&dtrace_meta_lock);
8091 8115 mutex_enter(&dtrace_lock);
8092 8116
8093 8117 if (old == dtrace_meta_pid) {
8094 8118 pp = &dtrace_meta_pid;
8095 8119 } else {
8096 8120 panic("attempt to unregister non-existent "
8097 8121 "dtrace meta-provider %p\n", (void *)old);
8098 8122 }
8099 8123
8100 8124 if (old->dtm_count != 0) {
8101 8125 mutex_exit(&dtrace_lock);
8102 8126 mutex_exit(&dtrace_meta_lock);
8103 8127 return (EBUSY);
8104 8128 }
8105 8129
8106 8130 *pp = NULL;
8107 8131
8108 8132 mutex_exit(&dtrace_lock);
8109 8133 mutex_exit(&dtrace_meta_lock);
8110 8134
8111 8135 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
8112 8136 kmem_free(old, sizeof (dtrace_meta_t));
8113 8137
8114 8138 return (0);
8115 8139 }
8116 8140
8117 8141
8118 8142 /*
8119 8143 * DTrace DIF Object Functions
8120 8144 */
8121 8145 static int
8122 8146 dtrace_difo_err(uint_t pc, const char *format, ...)
8123 8147 {
8124 8148 if (dtrace_err_verbose) {
8125 8149 va_list alist;
8126 8150
8127 8151 (void) uprintf("dtrace DIF object error: [%u]: ", pc);
8128 8152 va_start(alist, format);
8129 8153 (void) vuprintf(format, alist);
8130 8154 va_end(alist);
8131 8155 }
8132 8156
8133 8157 #ifdef DTRACE_ERRDEBUG
8134 8158 dtrace_errdebug(format);
8135 8159 #endif
8136 8160 return (1);
8137 8161 }
8138 8162
8139 8163 /*
8140 8164 * Validate a DTrace DIF object by checking the IR instructions. The following
8141 8165 * rules are currently enforced by dtrace_difo_validate():
8142 8166 *
8143 8167 * 1. Each instruction must have a valid opcode
8144 8168 * 2. Each register, string, variable, or subroutine reference must be valid
8145 8169 * 3. No instruction can modify register %r0 (must be zero)
8146 8170 * 4. All instruction reserved bits must be set to zero
8147 8171 * 5. The last instruction must be a "ret" instruction
8148 8172 * 6. All branch targets must reference a valid instruction _after_ the branch
8149 8173 */
8150 8174 static int
8151 8175 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
8152 8176 cred_t *cr)
8153 8177 {
8154 8178 int err = 0, i;
8155 8179 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8156 8180 int kcheckload;
8157 8181 uint_t pc;
8158 8182
8159 8183 kcheckload = cr == NULL ||
8160 8184 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
8161 8185
8162 8186 dp->dtdo_destructive = 0;
8163 8187
8164 8188 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
8165 8189 dif_instr_t instr = dp->dtdo_buf[pc];
8166 8190
8167 8191 uint_t r1 = DIF_INSTR_R1(instr);
8168 8192 uint_t r2 = DIF_INSTR_R2(instr);
8169 8193 uint_t rd = DIF_INSTR_RD(instr);
8170 8194 uint_t rs = DIF_INSTR_RS(instr);
8171 8195 uint_t label = DIF_INSTR_LABEL(instr);
8172 8196 uint_t v = DIF_INSTR_VAR(instr);
8173 8197 uint_t subr = DIF_INSTR_SUBR(instr);
8174 8198 uint_t type = DIF_INSTR_TYPE(instr);
8175 8199 uint_t op = DIF_INSTR_OP(instr);
8176 8200
8177 8201 switch (op) {
8178 8202 case DIF_OP_OR:
8179 8203 case DIF_OP_XOR:
8180 8204 case DIF_OP_AND:
8181 8205 case DIF_OP_SLL:
8182 8206 case DIF_OP_SRL:
8183 8207 case DIF_OP_SRA:
8184 8208 case DIF_OP_SUB:
8185 8209 case DIF_OP_ADD:
8186 8210 case DIF_OP_MUL:
8187 8211 case DIF_OP_SDIV:
8188 8212 case DIF_OP_UDIV:
8189 8213 case DIF_OP_SREM:
8190 8214 case DIF_OP_UREM:
8191 8215 case DIF_OP_COPYS:
8192 8216 if (r1 >= nregs)
8193 8217 err += efunc(pc, "invalid register %u\n", r1);
8194 8218 if (r2 >= nregs)
8195 8219 err += efunc(pc, "invalid register %u\n", r2);
8196 8220 if (rd >= nregs)
8197 8221 err += efunc(pc, "invalid register %u\n", rd);
8198 8222 if (rd == 0)
8199 8223 err += efunc(pc, "cannot write to %r0\n");
8200 8224 break;
8201 8225 case DIF_OP_NOT:
8202 8226 case DIF_OP_MOV:
8203 8227 case DIF_OP_ALLOCS:
8204 8228 if (r1 >= nregs)
8205 8229 err += efunc(pc, "invalid register %u\n", r1);
8206 8230 if (r2 != 0)
8207 8231 err += efunc(pc, "non-zero reserved bits\n");
8208 8232 if (rd >= nregs)
8209 8233 err += efunc(pc, "invalid register %u\n", rd);
8210 8234 if (rd == 0)
8211 8235 err += efunc(pc, "cannot write to %r0\n");
8212 8236 break;
8213 8237 case DIF_OP_LDSB:
8214 8238 case DIF_OP_LDSH:
8215 8239 case DIF_OP_LDSW:
8216 8240 case DIF_OP_LDUB:
8217 8241 case DIF_OP_LDUH:
8218 8242 case DIF_OP_LDUW:
8219 8243 case DIF_OP_LDX:
8220 8244 if (r1 >= nregs)
8221 8245 err += efunc(pc, "invalid register %u\n", r1);
8222 8246 if (r2 != 0)
8223 8247 err += efunc(pc, "non-zero reserved bits\n");
8224 8248 if (rd >= nregs)
8225 8249 err += efunc(pc, "invalid register %u\n", rd);
8226 8250 if (rd == 0)
8227 8251 err += efunc(pc, "cannot write to %r0\n");
8228 8252 if (kcheckload)
8229 8253 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
8230 8254 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
8231 8255 break;
8232 8256 case DIF_OP_RLDSB:
8233 8257 case DIF_OP_RLDSH:
8234 8258 case DIF_OP_RLDSW:
8235 8259 case DIF_OP_RLDUB:
8236 8260 case DIF_OP_RLDUH:
8237 8261 case DIF_OP_RLDUW:
8238 8262 case DIF_OP_RLDX:
8239 8263 if (r1 >= nregs)
8240 8264 err += efunc(pc, "invalid register %u\n", r1);
8241 8265 if (r2 != 0)
8242 8266 err += efunc(pc, "non-zero reserved bits\n");
8243 8267 if (rd >= nregs)
8244 8268 err += efunc(pc, "invalid register %u\n", rd);
8245 8269 if (rd == 0)
8246 8270 err += efunc(pc, "cannot write to %r0\n");
8247 8271 break;
8248 8272 case DIF_OP_ULDSB:
8249 8273 case DIF_OP_ULDSH:
8250 8274 case DIF_OP_ULDSW:
8251 8275 case DIF_OP_ULDUB:
8252 8276 case DIF_OP_ULDUH:
8253 8277 case DIF_OP_ULDUW:
8254 8278 case DIF_OP_ULDX:
8255 8279 if (r1 >= nregs)
8256 8280 err += efunc(pc, "invalid register %u\n", r1);
8257 8281 if (r2 != 0)
8258 8282 err += efunc(pc, "non-zero reserved bits\n");
8259 8283 if (rd >= nregs)
8260 8284 err += efunc(pc, "invalid register %u\n", rd);
8261 8285 if (rd == 0)
8262 8286 err += efunc(pc, "cannot write to %r0\n");
8263 8287 break;
8264 8288 case DIF_OP_STB:
8265 8289 case DIF_OP_STH:
8266 8290 case DIF_OP_STW:
8267 8291 case DIF_OP_STX:
8268 8292 if (r1 >= nregs)
8269 8293 err += efunc(pc, "invalid register %u\n", r1);
8270 8294 if (r2 != 0)
8271 8295 err += efunc(pc, "non-zero reserved bits\n");
8272 8296 if (rd >= nregs)
8273 8297 err += efunc(pc, "invalid register %u\n", rd);
8274 8298 if (rd == 0)
8275 8299 err += efunc(pc, "cannot write to 0 address\n");
8276 8300 break;
8277 8301 case DIF_OP_CMP:
8278 8302 case DIF_OP_SCMP:
8279 8303 if (r1 >= nregs)
8280 8304 err += efunc(pc, "invalid register %u\n", r1);
8281 8305 if (r2 >= nregs)
8282 8306 err += efunc(pc, "invalid register %u\n", r2);
8283 8307 if (rd != 0)
8284 8308 err += efunc(pc, "non-zero reserved bits\n");
8285 8309 break;
8286 8310 case DIF_OP_TST:
8287 8311 if (r1 >= nregs)
8288 8312 err += efunc(pc, "invalid register %u\n", r1);
8289 8313 if (r2 != 0 || rd != 0)
8290 8314 err += efunc(pc, "non-zero reserved bits\n");
8291 8315 break;
8292 8316 case DIF_OP_BA:
8293 8317 case DIF_OP_BE:
8294 8318 case DIF_OP_BNE:
8295 8319 case DIF_OP_BG:
8296 8320 case DIF_OP_BGU:
8297 8321 case DIF_OP_BGE:
8298 8322 case DIF_OP_BGEU:
8299 8323 case DIF_OP_BL:
8300 8324 case DIF_OP_BLU:
8301 8325 case DIF_OP_BLE:
8302 8326 case DIF_OP_BLEU:
8303 8327 if (label >= dp->dtdo_len) {
8304 8328 err += efunc(pc, "invalid branch target %u\n",
8305 8329 label);
8306 8330 }
8307 8331 if (label <= pc) {
8308 8332 err += efunc(pc, "backward branch to %u\n",
8309 8333 label);
8310 8334 }
8311 8335 break;
8312 8336 case DIF_OP_RET:
8313 8337 if (r1 != 0 || r2 != 0)
8314 8338 err += efunc(pc, "non-zero reserved bits\n");
8315 8339 if (rd >= nregs)
8316 8340 err += efunc(pc, "invalid register %u\n", rd);
8317 8341 break;
8318 8342 case DIF_OP_NOP:
8319 8343 case DIF_OP_POPTS:
8320 8344 case DIF_OP_FLUSHTS:
8321 8345 if (r1 != 0 || r2 != 0 || rd != 0)
8322 8346 err += efunc(pc, "non-zero reserved bits\n");
8323 8347 break;
8324 8348 case DIF_OP_SETX:
8325 8349 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
8326 8350 err += efunc(pc, "invalid integer ref %u\n",
8327 8351 DIF_INSTR_INTEGER(instr));
8328 8352 }
8329 8353 if (rd >= nregs)
8330 8354 err += efunc(pc, "invalid register %u\n", rd);
8331 8355 if (rd == 0)
8332 8356 err += efunc(pc, "cannot write to %r0\n");
8333 8357 break;
8334 8358 case DIF_OP_SETS:
8335 8359 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
8336 8360 err += efunc(pc, "invalid string ref %u\n",
8337 8361 DIF_INSTR_STRING(instr));
8338 8362 }
8339 8363 if (rd >= nregs)
8340 8364 err += efunc(pc, "invalid register %u\n", rd);
8341 8365 if (rd == 0)
8342 8366 err += efunc(pc, "cannot write to %r0\n");
8343 8367 break;
8344 8368 case DIF_OP_LDGA:
8345 8369 case DIF_OP_LDTA:
8346 8370 if (r1 > DIF_VAR_ARRAY_MAX)
8347 8371 err += efunc(pc, "invalid array %u\n", r1);
8348 8372 if (r2 >= nregs)
8349 8373 err += efunc(pc, "invalid register %u\n", r2);
8350 8374 if (rd >= nregs)
8351 8375 err += efunc(pc, "invalid register %u\n", rd);
8352 8376 if (rd == 0)
8353 8377 err += efunc(pc, "cannot write to %r0\n");
8354 8378 break;
8355 8379 case DIF_OP_LDGS:
8356 8380 case DIF_OP_LDTS:
8357 8381 case DIF_OP_LDLS:
8358 8382 case DIF_OP_LDGAA:
8359 8383 case DIF_OP_LDTAA:
8360 8384 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
8361 8385 err += efunc(pc, "invalid variable %u\n", v);
8362 8386 if (rd >= nregs)
8363 8387 err += efunc(pc, "invalid register %u\n", rd);
8364 8388 if (rd == 0)
8365 8389 err += efunc(pc, "cannot write to %r0\n");
8366 8390 break;
8367 8391 case DIF_OP_STGS:
8368 8392 case DIF_OP_STTS:
8369 8393 case DIF_OP_STLS:
8370 8394 case DIF_OP_STGAA:
8371 8395 case DIF_OP_STTAA:
8372 8396 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8373 8397 err += efunc(pc, "invalid variable %u\n", v);
8374 8398 if (rs >= nregs)
8375 8399 err += efunc(pc, "invalid register %u\n", rd);
8376 8400 break;
8377 8401 case DIF_OP_CALL:
8378 8402 if (subr > DIF_SUBR_MAX)
8379 8403 err += efunc(pc, "invalid subr %u\n", subr);
8380 8404 if (rd >= nregs)
8381 8405 err += efunc(pc, "invalid register %u\n", rd);
8382 8406 if (rd == 0)
8383 8407 err += efunc(pc, "cannot write to %r0\n");
8384 8408
8385 8409 if (subr == DIF_SUBR_COPYOUT ||
8386 8410 subr == DIF_SUBR_COPYOUTSTR) {
8387 8411 dp->dtdo_destructive = 1;
8388 8412 }
8389 8413 break;
8390 8414 case DIF_OP_PUSHTR:
8391 8415 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8392 8416 err += efunc(pc, "invalid ref type %u\n", type);
8393 8417 if (r2 >= nregs)
8394 8418 err += efunc(pc, "invalid register %u\n", r2);
8395 8419 if (rs >= nregs)
8396 8420 err += efunc(pc, "invalid register %u\n", rs);
8397 8421 break;
8398 8422 case DIF_OP_PUSHTV:
8399 8423 if (type != DIF_TYPE_CTF)
8400 8424 err += efunc(pc, "invalid val type %u\n", type);
8401 8425 if (r2 >= nregs)
8402 8426 err += efunc(pc, "invalid register %u\n", r2);
8403 8427 if (rs >= nregs)
8404 8428 err += efunc(pc, "invalid register %u\n", rs);
8405 8429 break;
8406 8430 default:
8407 8431 err += efunc(pc, "invalid opcode %u\n",
8408 8432 DIF_INSTR_OP(instr));
8409 8433 }
8410 8434 }
8411 8435
8412 8436 if (dp->dtdo_len != 0 &&
8413 8437 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
8414 8438 err += efunc(dp->dtdo_len - 1,
8415 8439 "expected 'ret' as last DIF instruction\n");
8416 8440 }
8417 8441
8418 8442 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) {
8419 8443 /*
8420 8444 * If we're not returning by reference, the size must be either
8421 8445 * 0 or the size of one of the base types.
8422 8446 */
8423 8447 switch (dp->dtdo_rtype.dtdt_size) {
8424 8448 case 0:
8425 8449 case sizeof (uint8_t):
8426 8450 case sizeof (uint16_t):
8427 8451 case sizeof (uint32_t):
8428 8452 case sizeof (uint64_t):
8429 8453 break;
8430 8454
8431 8455 default:
8432 8456 err += efunc(dp->dtdo_len - 1, "bad return size\n");
8433 8457 }
8434 8458 }
8435 8459
8436 8460 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
8437 8461 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
8438 8462 dtrace_diftype_t *vt, *et;
8439 8463 uint_t id, ndx;
8440 8464
8441 8465 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
8442 8466 v->dtdv_scope != DIFV_SCOPE_THREAD &&
8443 8467 v->dtdv_scope != DIFV_SCOPE_LOCAL) {
8444 8468 err += efunc(i, "unrecognized variable scope %d\n",
8445 8469 v->dtdv_scope);
8446 8470 break;
8447 8471 }
8448 8472
8449 8473 if (v->dtdv_kind != DIFV_KIND_ARRAY &&
8450 8474 v->dtdv_kind != DIFV_KIND_SCALAR) {
8451 8475 err += efunc(i, "unrecognized variable type %d\n",
8452 8476 v->dtdv_kind);
8453 8477 break;
8454 8478 }
8455 8479
8456 8480 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
8457 8481 err += efunc(i, "%d exceeds variable id limit\n", id);
8458 8482 break;
8459 8483 }
8460 8484
8461 8485 if (id < DIF_VAR_OTHER_UBASE)
8462 8486 continue;
8463 8487
8464 8488 /*
8465 8489 * For user-defined variables, we need to check that this
8466 8490 * definition is identical to any previous definition that we
8467 8491 * encountered.
8468 8492 */
8469 8493 ndx = id - DIF_VAR_OTHER_UBASE;
8470 8494
8471 8495 switch (v->dtdv_scope) {
8472 8496 case DIFV_SCOPE_GLOBAL:
8473 8497 if (ndx < vstate->dtvs_nglobals) {
8474 8498 dtrace_statvar_t *svar;
8475 8499
8476 8500 if ((svar = vstate->dtvs_globals[ndx]) != NULL)
8477 8501 existing = &svar->dtsv_var;
8478 8502 }
8479 8503
8480 8504 break;
8481 8505
8482 8506 case DIFV_SCOPE_THREAD:
8483 8507 if (ndx < vstate->dtvs_ntlocals)
8484 8508 existing = &vstate->dtvs_tlocals[ndx];
8485 8509 break;
8486 8510
8487 8511 case DIFV_SCOPE_LOCAL:
8488 8512 if (ndx < vstate->dtvs_nlocals) {
8489 8513 dtrace_statvar_t *svar;
8490 8514
8491 8515 if ((svar = vstate->dtvs_locals[ndx]) != NULL)
8492 8516 existing = &svar->dtsv_var;
8493 8517 }
8494 8518
8495 8519 break;
8496 8520 }
8497 8521
8498 8522 vt = &v->dtdv_type;
8499 8523
8500 8524 if (vt->dtdt_flags & DIF_TF_BYREF) {
8501 8525 if (vt->dtdt_size == 0) {
8502 8526 err += efunc(i, "zero-sized variable\n");
8503 8527 break;
8504 8528 }
8505 8529
8506 8530 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL &&
8507 8531 vt->dtdt_size > dtrace_global_maxsize) {
8508 8532 err += efunc(i, "oversized by-ref global\n");
8509 8533 break;
8510 8534 }
8511 8535 }
8512 8536
8513 8537 if (existing == NULL || existing->dtdv_id == 0)
8514 8538 continue;
8515 8539
8516 8540 ASSERT(existing->dtdv_id == v->dtdv_id);
8517 8541 ASSERT(existing->dtdv_scope == v->dtdv_scope);
8518 8542
8519 8543 if (existing->dtdv_kind != v->dtdv_kind)
8520 8544 err += efunc(i, "%d changed variable kind\n", id);
8521 8545
8522 8546 et = &existing->dtdv_type;
8523 8547
8524 8548 if (vt->dtdt_flags != et->dtdt_flags) {
8525 8549 err += efunc(i, "%d changed variable type flags\n", id);
8526 8550 break;
8527 8551 }
8528 8552
8529 8553 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
8530 8554 err += efunc(i, "%d changed variable type size\n", id);
8531 8555 break;
8532 8556 }
8533 8557 }
8534 8558
8535 8559 return (err);
8536 8560 }
8537 8561
8538 8562 /*
8539 8563 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8540 8564 * are much more constrained than normal DIFOs. Specifically, they may
8541 8565 * not:
8542 8566 *
8543 8567 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8544 8568 * miscellaneous string routines
8545 8569 * 2. Access DTrace variables other than the args[] array, and the
8546 8570 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8547 8571 * 3. Have thread-local variables.
8548 8572 * 4. Have dynamic variables.
8549 8573 */
8550 8574 static int
8551 8575 dtrace_difo_validate_helper(dtrace_difo_t *dp)
8552 8576 {
8553 8577 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8554 8578 int err = 0;
8555 8579 uint_t pc;
8556 8580
8557 8581 for (pc = 0; pc < dp->dtdo_len; pc++) {
8558 8582 dif_instr_t instr = dp->dtdo_buf[pc];
8559 8583
8560 8584 uint_t v = DIF_INSTR_VAR(instr);
8561 8585 uint_t subr = DIF_INSTR_SUBR(instr);
8562 8586 uint_t op = DIF_INSTR_OP(instr);
8563 8587
8564 8588 switch (op) {
8565 8589 case DIF_OP_OR:
8566 8590 case DIF_OP_XOR:
8567 8591 case DIF_OP_AND:
8568 8592 case DIF_OP_SLL:
8569 8593 case DIF_OP_SRL:
8570 8594 case DIF_OP_SRA:
8571 8595 case DIF_OP_SUB:
8572 8596 case DIF_OP_ADD:
8573 8597 case DIF_OP_MUL:
8574 8598 case DIF_OP_SDIV:
8575 8599 case DIF_OP_UDIV:
8576 8600 case DIF_OP_SREM:
8577 8601 case DIF_OP_UREM:
8578 8602 case DIF_OP_COPYS:
8579 8603 case DIF_OP_NOT:
8580 8604 case DIF_OP_MOV:
8581 8605 case DIF_OP_RLDSB:
8582 8606 case DIF_OP_RLDSH:
8583 8607 case DIF_OP_RLDSW:
8584 8608 case DIF_OP_RLDUB:
8585 8609 case DIF_OP_RLDUH:
8586 8610 case DIF_OP_RLDUW:
8587 8611 case DIF_OP_RLDX:
8588 8612 case DIF_OP_ULDSB:
8589 8613 case DIF_OP_ULDSH:
8590 8614 case DIF_OP_ULDSW:
8591 8615 case DIF_OP_ULDUB:
8592 8616 case DIF_OP_ULDUH:
8593 8617 case DIF_OP_ULDUW:
8594 8618 case DIF_OP_ULDX:
8595 8619 case DIF_OP_STB:
8596 8620 case DIF_OP_STH:
8597 8621 case DIF_OP_STW:
8598 8622 case DIF_OP_STX:
8599 8623 case DIF_OP_ALLOCS:
8600 8624 case DIF_OP_CMP:
8601 8625 case DIF_OP_SCMP:
8602 8626 case DIF_OP_TST:
8603 8627 case DIF_OP_BA:
8604 8628 case DIF_OP_BE:
8605 8629 case DIF_OP_BNE:
8606 8630 case DIF_OP_BG:
8607 8631 case DIF_OP_BGU:
8608 8632 case DIF_OP_BGE:
8609 8633 case DIF_OP_BGEU:
8610 8634 case DIF_OP_BL:
8611 8635 case DIF_OP_BLU:
8612 8636 case DIF_OP_BLE:
8613 8637 case DIF_OP_BLEU:
8614 8638 case DIF_OP_RET:
8615 8639 case DIF_OP_NOP:
8616 8640 case DIF_OP_POPTS:
8617 8641 case DIF_OP_FLUSHTS:
8618 8642 case DIF_OP_SETX:
8619 8643 case DIF_OP_SETS:
8620 8644 case DIF_OP_LDGA:
8621 8645 case DIF_OP_LDLS:
8622 8646 case DIF_OP_STGS:
8623 8647 case DIF_OP_STLS:
8624 8648 case DIF_OP_PUSHTR:
8625 8649 case DIF_OP_PUSHTV:
8626 8650 break;
8627 8651
8628 8652 case DIF_OP_LDGS:
8629 8653 if (v >= DIF_VAR_OTHER_UBASE)
8630 8654 break;
8631 8655
8632 8656 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
8633 8657 break;
8634 8658
8635 8659 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
8636 8660 v == DIF_VAR_PPID || v == DIF_VAR_TID ||
8637 8661 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
8638 8662 v == DIF_VAR_UID || v == DIF_VAR_GID)
8639 8663 break;
8640 8664
8641 8665 err += efunc(pc, "illegal variable %u\n", v);
8642 8666 break;
8643 8667
8644 8668 case DIF_OP_LDTA:
8645 8669 case DIF_OP_LDTS:
8646 8670 case DIF_OP_LDGAA:
8647 8671 case DIF_OP_LDTAA:
8648 8672 err += efunc(pc, "illegal dynamic variable load\n");
8649 8673 break;
8650 8674
8651 8675 case DIF_OP_STTS:
8652 8676 case DIF_OP_STGAA:
8653 8677 case DIF_OP_STTAA:
8654 8678 err += efunc(pc, "illegal dynamic variable store\n");
8655 8679 break;
8656 8680
8657 8681 case DIF_OP_CALL:
8658 8682 if (subr == DIF_SUBR_ALLOCA ||
8659 8683 subr == DIF_SUBR_BCOPY ||
8660 8684 subr == DIF_SUBR_COPYIN ||
8661 8685 subr == DIF_SUBR_COPYINTO ||
8662 8686 subr == DIF_SUBR_COPYINSTR ||
8663 8687 subr == DIF_SUBR_INDEX ||
8664 8688 subr == DIF_SUBR_INET_NTOA ||
8665 8689 subr == DIF_SUBR_INET_NTOA6 ||
8666 8690 subr == DIF_SUBR_INET_NTOP ||
8667 8691 subr == DIF_SUBR_LLTOSTR ||
8668 8692 subr == DIF_SUBR_RINDEX ||
8669 8693 subr == DIF_SUBR_STRCHR ||
8670 8694 subr == DIF_SUBR_STRJOIN ||
8671 8695 subr == DIF_SUBR_STRRCHR ||
8672 8696 subr == DIF_SUBR_STRSTR ||
8673 8697 subr == DIF_SUBR_HTONS ||
8674 8698 subr == DIF_SUBR_HTONL ||
8675 8699 subr == DIF_SUBR_HTONLL ||
8676 8700 subr == DIF_SUBR_NTOHS ||
8677 8701 subr == DIF_SUBR_NTOHL ||
8678 8702 subr == DIF_SUBR_NTOHLL)
8679 8703 break;
8680 8704
8681 8705 err += efunc(pc, "invalid subr %u\n", subr);
8682 8706 break;
8683 8707
8684 8708 default:
8685 8709 err += efunc(pc, "invalid opcode %u\n",
8686 8710 DIF_INSTR_OP(instr));
8687 8711 }
8688 8712 }
8689 8713
8690 8714 return (err);
8691 8715 }
8692 8716
8693 8717 /*
8694 8718 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8695 8719 * basis; 0 if not.
8696 8720 */
8697 8721 static int
8698 8722 dtrace_difo_cacheable(dtrace_difo_t *dp)
8699 8723 {
8700 8724 int i;
8701 8725
8702 8726 if (dp == NULL)
8703 8727 return (0);
8704 8728
8705 8729 for (i = 0; i < dp->dtdo_varlen; i++) {
8706 8730 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8707 8731
8708 8732 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
8709 8733 continue;
8710 8734
8711 8735 switch (v->dtdv_id) {
8712 8736 case DIF_VAR_CURTHREAD:
8713 8737 case DIF_VAR_PID:
8714 8738 case DIF_VAR_TID:
8715 8739 case DIF_VAR_EXECNAME:
8716 8740 case DIF_VAR_ZONENAME:
8717 8741 break;
8718 8742
8719 8743 default:
8720 8744 return (0);
8721 8745 }
8722 8746 }
8723 8747
8724 8748 /*
8725 8749 * This DIF object may be cacheable. Now we need to look for any
8726 8750 * array loading instructions, any memory loading instructions, or
8727 8751 * any stores to thread-local variables.
8728 8752 */
8729 8753 for (i = 0; i < dp->dtdo_len; i++) {
8730 8754 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
8731 8755
8732 8756 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
8733 8757 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
8734 8758 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
8735 8759 op == DIF_OP_LDGA || op == DIF_OP_STTS)
8736 8760 return (0);
8737 8761 }
8738 8762
8739 8763 return (1);
8740 8764 }
8741 8765
8742 8766 static void
8743 8767 dtrace_difo_hold(dtrace_difo_t *dp)
8744 8768 {
8745 8769 int i;
8746 8770
8747 8771 ASSERT(MUTEX_HELD(&dtrace_lock));
8748 8772
8749 8773 dp->dtdo_refcnt++;
8750 8774 ASSERT(dp->dtdo_refcnt != 0);
8751 8775
8752 8776 /*
8753 8777 * We need to check this DIF object for references to the variable
8754 8778 * DIF_VAR_VTIMESTAMP.
8755 8779 */
8756 8780 for (i = 0; i < dp->dtdo_varlen; i++) {
8757 8781 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8758 8782
8759 8783 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
8760 8784 continue;
8761 8785
8762 8786 if (dtrace_vtime_references++ == 0)
8763 8787 dtrace_vtime_enable();
8764 8788 }
8765 8789 }
8766 8790
8767 8791 /*
8768 8792 * This routine calculates the dynamic variable chunksize for a given DIF
8769 8793 * object. The calculation is not fool-proof, and can probably be tricked by
8770 8794 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8771 8795 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8772 8796 * if a dynamic variable size exceeds the chunksize.
8773 8797 */
8774 8798 static void
8775 8799 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
8776 8800 {
8777 8801 uint64_t sval;
8778 8802 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
8779 8803 const dif_instr_t *text = dp->dtdo_buf;
8780 8804 uint_t pc, srd = 0;
8781 8805 uint_t ttop = 0;
8782 8806 size_t size, ksize;
8783 8807 uint_t id, i;
8784 8808
8785 8809 for (pc = 0; pc < dp->dtdo_len; pc++) {
8786 8810 dif_instr_t instr = text[pc];
8787 8811 uint_t op = DIF_INSTR_OP(instr);
8788 8812 uint_t rd = DIF_INSTR_RD(instr);
8789 8813 uint_t r1 = DIF_INSTR_R1(instr);
8790 8814 uint_t nkeys = 0;
8791 8815 uchar_t scope;
8792 8816
8793 8817 dtrace_key_t *key = tupregs;
8794 8818
8795 8819 switch (op) {
8796 8820 case DIF_OP_SETX:
8797 8821 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
8798 8822 srd = rd;
8799 8823 continue;
8800 8824
8801 8825 case DIF_OP_STTS:
8802 8826 key = &tupregs[DIF_DTR_NREGS];
8803 8827 key[0].dttk_size = 0;
8804 8828 key[1].dttk_size = 0;
8805 8829 nkeys = 2;
8806 8830 scope = DIFV_SCOPE_THREAD;
8807 8831 break;
8808 8832
8809 8833 case DIF_OP_STGAA:
8810 8834 case DIF_OP_STTAA:
8811 8835 nkeys = ttop;
8812 8836
8813 8837 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
8814 8838 key[nkeys++].dttk_size = 0;
8815 8839
8816 8840 key[nkeys++].dttk_size = 0;
8817 8841
8818 8842 if (op == DIF_OP_STTAA) {
8819 8843 scope = DIFV_SCOPE_THREAD;
8820 8844 } else {
8821 8845 scope = DIFV_SCOPE_GLOBAL;
8822 8846 }
8823 8847
8824 8848 break;
8825 8849
8826 8850 case DIF_OP_PUSHTR:
8827 8851 if (ttop == DIF_DTR_NREGS)
8828 8852 return;
8829 8853
8830 8854 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
8831 8855 /*
8832 8856 * If the register for the size of the "pushtr"
8833 8857 * is %r0 (or the value is 0) and the type is
8834 8858 * a string, we'll use the system-wide default
8835 8859 * string size.
8836 8860 */
8837 8861 tupregs[ttop++].dttk_size =
8838 8862 dtrace_strsize_default;
8839 8863 } else {
8840 8864 if (srd == 0)
8841 8865 return;
8842 8866
8843 8867 tupregs[ttop++].dttk_size = sval;
8844 8868 }
8845 8869
8846 8870 break;
8847 8871
8848 8872 case DIF_OP_PUSHTV:
8849 8873 if (ttop == DIF_DTR_NREGS)
8850 8874 return;
8851 8875
8852 8876 tupregs[ttop++].dttk_size = 0;
8853 8877 break;
8854 8878
8855 8879 case DIF_OP_FLUSHTS:
8856 8880 ttop = 0;
8857 8881 break;
8858 8882
8859 8883 case DIF_OP_POPTS:
8860 8884 if (ttop != 0)
8861 8885 ttop--;
8862 8886 break;
8863 8887 }
8864 8888
8865 8889 sval = 0;
8866 8890 srd = 0;
8867 8891
8868 8892 if (nkeys == 0)
8869 8893 continue;
8870 8894
8871 8895 /*
8872 8896 * We have a dynamic variable allocation; calculate its size.
8873 8897 */
8874 8898 for (ksize = 0, i = 0; i < nkeys; i++)
8875 8899 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
8876 8900
8877 8901 size = sizeof (dtrace_dynvar_t);
8878 8902 size += sizeof (dtrace_key_t) * (nkeys - 1);
8879 8903 size += ksize;
8880 8904
8881 8905 /*
8882 8906 * Now we need to determine the size of the stored data.
8883 8907 */
8884 8908 id = DIF_INSTR_VAR(instr);
8885 8909
8886 8910 for (i = 0; i < dp->dtdo_varlen; i++) {
8887 8911 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8888 8912
8889 8913 if (v->dtdv_id == id && v->dtdv_scope == scope) {
8890 8914 size += v->dtdv_type.dtdt_size;
8891 8915 break;
8892 8916 }
8893 8917 }
8894 8918
8895 8919 if (i == dp->dtdo_varlen)
8896 8920 return;
8897 8921
8898 8922 /*
8899 8923 * We have the size. If this is larger than the chunk size
8900 8924 * for our dynamic variable state, reset the chunk size.
8901 8925 */
8902 8926 size = P2ROUNDUP(size, sizeof (uint64_t));
8903 8927
8904 8928 if (size > vstate->dtvs_dynvars.dtds_chunksize)
8905 8929 vstate->dtvs_dynvars.dtds_chunksize = size;
8906 8930 }
8907 8931 }
8908 8932
8909 8933 static void
8910 8934 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
8911 8935 {
8912 8936 int i, oldsvars, osz, nsz, otlocals, ntlocals;
8913 8937 uint_t id;
8914 8938
8915 8939 ASSERT(MUTEX_HELD(&dtrace_lock));
8916 8940 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
8917 8941
8918 8942 for (i = 0; i < dp->dtdo_varlen; i++) {
8919 8943 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8920 8944 dtrace_statvar_t *svar, ***svarp;
8921 8945 size_t dsize = 0;
8922 8946 uint8_t scope = v->dtdv_scope;
8923 8947 int *np;
8924 8948
8925 8949 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
8926 8950 continue;
8927 8951
8928 8952 id -= DIF_VAR_OTHER_UBASE;
8929 8953
8930 8954 switch (scope) {
8931 8955 case DIFV_SCOPE_THREAD:
8932 8956 while (id >= (otlocals = vstate->dtvs_ntlocals)) {
8933 8957 dtrace_difv_t *tlocals;
8934 8958
8935 8959 if ((ntlocals = (otlocals << 1)) == 0)
8936 8960 ntlocals = 1;
8937 8961
8938 8962 osz = otlocals * sizeof (dtrace_difv_t);
8939 8963 nsz = ntlocals * sizeof (dtrace_difv_t);
8940 8964
8941 8965 tlocals = kmem_zalloc(nsz, KM_SLEEP);
8942 8966
8943 8967 if (osz != 0) {
8944 8968 bcopy(vstate->dtvs_tlocals,
8945 8969 tlocals, osz);
8946 8970 kmem_free(vstate->dtvs_tlocals, osz);
8947 8971 }
8948 8972
8949 8973 vstate->dtvs_tlocals = tlocals;
8950 8974 vstate->dtvs_ntlocals = ntlocals;
8951 8975 }
8952 8976
8953 8977 vstate->dtvs_tlocals[id] = *v;
8954 8978 continue;
8955 8979
8956 8980 case DIFV_SCOPE_LOCAL:
8957 8981 np = &vstate->dtvs_nlocals;
8958 8982 svarp = &vstate->dtvs_locals;
8959 8983
8960 8984 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
8961 8985 dsize = NCPU * (v->dtdv_type.dtdt_size +
8962 8986 sizeof (uint64_t));
8963 8987 else
8964 8988 dsize = NCPU * sizeof (uint64_t);
8965 8989
8966 8990 break;
8967 8991
8968 8992 case DIFV_SCOPE_GLOBAL:
8969 8993 np = &vstate->dtvs_nglobals;
8970 8994 svarp = &vstate->dtvs_globals;
8971 8995
8972 8996 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
8973 8997 dsize = v->dtdv_type.dtdt_size +
8974 8998 sizeof (uint64_t);
8975 8999
8976 9000 break;
8977 9001
8978 9002 default:
8979 9003 ASSERT(0);
8980 9004 }
8981 9005
8982 9006 while (id >= (oldsvars = *np)) {
8983 9007 dtrace_statvar_t **statics;
8984 9008 int newsvars, oldsize, newsize;
8985 9009
8986 9010 if ((newsvars = (oldsvars << 1)) == 0)
8987 9011 newsvars = 1;
8988 9012
8989 9013 oldsize = oldsvars * sizeof (dtrace_statvar_t *);
8990 9014 newsize = newsvars * sizeof (dtrace_statvar_t *);
8991 9015
8992 9016 statics = kmem_zalloc(newsize, KM_SLEEP);
8993 9017
8994 9018 if (oldsize != 0) {
8995 9019 bcopy(*svarp, statics, oldsize);
8996 9020 kmem_free(*svarp, oldsize);
8997 9021 }
8998 9022
8999 9023 *svarp = statics;
9000 9024 *np = newsvars;
9001 9025 }
9002 9026
9003 9027 if ((svar = (*svarp)[id]) == NULL) {
9004 9028 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
9005 9029 svar->dtsv_var = *v;
9006 9030
9007 9031 if ((svar->dtsv_size = dsize) != 0) {
9008 9032 svar->dtsv_data = (uint64_t)(uintptr_t)
9009 9033 kmem_zalloc(dsize, KM_SLEEP);
9010 9034 }
9011 9035
9012 9036 (*svarp)[id] = svar;
9013 9037 }
9014 9038
9015 9039 svar->dtsv_refcnt++;
9016 9040 }
9017 9041
9018 9042 dtrace_difo_chunksize(dp, vstate);
9019 9043 dtrace_difo_hold(dp);
9020 9044 }
9021 9045
9022 9046 static dtrace_difo_t *
9023 9047 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9024 9048 {
9025 9049 dtrace_difo_t *new;
9026 9050 size_t sz;
9027 9051
9028 9052 ASSERT(dp->dtdo_buf != NULL);
9029 9053 ASSERT(dp->dtdo_refcnt != 0);
9030 9054
9031 9055 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
9032 9056
9033 9057 ASSERT(dp->dtdo_buf != NULL);
9034 9058 sz = dp->dtdo_len * sizeof (dif_instr_t);
9035 9059 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
9036 9060 bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
9037 9061 new->dtdo_len = dp->dtdo_len;
9038 9062
9039 9063 if (dp->dtdo_strtab != NULL) {
9040 9064 ASSERT(dp->dtdo_strlen != 0);
9041 9065 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
9042 9066 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
9043 9067 new->dtdo_strlen = dp->dtdo_strlen;
9044 9068 }
9045 9069
9046 9070 if (dp->dtdo_inttab != NULL) {
9047 9071 ASSERT(dp->dtdo_intlen != 0);
9048 9072 sz = dp->dtdo_intlen * sizeof (uint64_t);
9049 9073 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
9050 9074 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
9051 9075 new->dtdo_intlen = dp->dtdo_intlen;
9052 9076 }
9053 9077
9054 9078 if (dp->dtdo_vartab != NULL) {
9055 9079 ASSERT(dp->dtdo_varlen != 0);
9056 9080 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
9057 9081 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
9058 9082 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
9059 9083 new->dtdo_varlen = dp->dtdo_varlen;
9060 9084 }
9061 9085
9062 9086 dtrace_difo_init(new, vstate);
9063 9087 return (new);
9064 9088 }
9065 9089
9066 9090 static void
9067 9091 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9068 9092 {
9069 9093 int i;
9070 9094
9071 9095 ASSERT(dp->dtdo_refcnt == 0);
9072 9096
9073 9097 for (i = 0; i < dp->dtdo_varlen; i++) {
9074 9098 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9075 9099 dtrace_statvar_t *svar, **svarp;
9076 9100 uint_t id;
9077 9101 uint8_t scope = v->dtdv_scope;
9078 9102 int *np;
9079 9103
9080 9104 switch (scope) {
9081 9105 case DIFV_SCOPE_THREAD:
9082 9106 continue;
9083 9107
9084 9108 case DIFV_SCOPE_LOCAL:
9085 9109 np = &vstate->dtvs_nlocals;
9086 9110 svarp = vstate->dtvs_locals;
9087 9111 break;
9088 9112
9089 9113 case DIFV_SCOPE_GLOBAL:
9090 9114 np = &vstate->dtvs_nglobals;
9091 9115 svarp = vstate->dtvs_globals;
9092 9116 break;
9093 9117
9094 9118 default:
9095 9119 ASSERT(0);
9096 9120 }
9097 9121
9098 9122 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9099 9123 continue;
9100 9124
9101 9125 id -= DIF_VAR_OTHER_UBASE;
9102 9126 ASSERT(id < *np);
9103 9127
9104 9128 svar = svarp[id];
9105 9129 ASSERT(svar != NULL);
9106 9130 ASSERT(svar->dtsv_refcnt > 0);
9107 9131
9108 9132 if (--svar->dtsv_refcnt > 0)
9109 9133 continue;
9110 9134
9111 9135 if (svar->dtsv_size != 0) {
9112 9136 ASSERT(svar->dtsv_data != NULL);
9113 9137 kmem_free((void *)(uintptr_t)svar->dtsv_data,
9114 9138 svar->dtsv_size);
9115 9139 }
9116 9140
9117 9141 kmem_free(svar, sizeof (dtrace_statvar_t));
9118 9142 svarp[id] = NULL;
9119 9143 }
9120 9144
9121 9145 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
9122 9146 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
9123 9147 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
9124 9148 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
9125 9149
9126 9150 kmem_free(dp, sizeof (dtrace_difo_t));
9127 9151 }
9128 9152
9129 9153 static void
9130 9154 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9131 9155 {
9132 9156 int i;
9133 9157
9134 9158 ASSERT(MUTEX_HELD(&dtrace_lock));
9135 9159 ASSERT(dp->dtdo_refcnt != 0);
9136 9160
9137 9161 for (i = 0; i < dp->dtdo_varlen; i++) {
9138 9162 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9139 9163
9140 9164 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9141 9165 continue;
9142 9166
9143 9167 ASSERT(dtrace_vtime_references > 0);
9144 9168 if (--dtrace_vtime_references == 0)
9145 9169 dtrace_vtime_disable();
9146 9170 }
9147 9171
9148 9172 if (--dp->dtdo_refcnt == 0)
9149 9173 dtrace_difo_destroy(dp, vstate);
9150 9174 }
9151 9175
9152 9176 /*
9153 9177 * DTrace Format Functions
9154 9178 */
9155 9179 static uint16_t
9156 9180 dtrace_format_add(dtrace_state_t *state, char *str)
9157 9181 {
9158 9182 char *fmt, **new;
9159 9183 uint16_t ndx, len = strlen(str) + 1;
9160 9184
9161 9185 fmt = kmem_zalloc(len, KM_SLEEP);
9162 9186 bcopy(str, fmt, len);
9163 9187
9164 9188 for (ndx = 0; ndx < state->dts_nformats; ndx++) {
9165 9189 if (state->dts_formats[ndx] == NULL) {
9166 9190 state->dts_formats[ndx] = fmt;
9167 9191 return (ndx + 1);
9168 9192 }
9169 9193 }
9170 9194
9171 9195 if (state->dts_nformats == USHRT_MAX) {
9172 9196 /*
9173 9197 * This is only likely if a denial-of-service attack is being
9174 9198 * attempted. As such, it's okay to fail silently here.
9175 9199 */
9176 9200 kmem_free(fmt, len);
9177 9201 return (0);
9178 9202 }
9179 9203
9180 9204 /*
9181 9205 * For simplicity, we always resize the formats array to be exactly the
9182 9206 * number of formats.
9183 9207 */
9184 9208 ndx = state->dts_nformats++;
9185 9209 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
9186 9210
9187 9211 if (state->dts_formats != NULL) {
9188 9212 ASSERT(ndx != 0);
9189 9213 bcopy(state->dts_formats, new, ndx * sizeof (char *));
9190 9214 kmem_free(state->dts_formats, ndx * sizeof (char *));
9191 9215 }
9192 9216
9193 9217 state->dts_formats = new;
9194 9218 state->dts_formats[ndx] = fmt;
9195 9219
9196 9220 return (ndx + 1);
9197 9221 }
9198 9222
9199 9223 static void
9200 9224 dtrace_format_remove(dtrace_state_t *state, uint16_t format)
9201 9225 {
9202 9226 char *fmt;
9203 9227
9204 9228 ASSERT(state->dts_formats != NULL);
9205 9229 ASSERT(format <= state->dts_nformats);
9206 9230 ASSERT(state->dts_formats[format - 1] != NULL);
9207 9231
9208 9232 fmt = state->dts_formats[format - 1];
9209 9233 kmem_free(fmt, strlen(fmt) + 1);
9210 9234 state->dts_formats[format - 1] = NULL;
9211 9235 }
9212 9236
9213 9237 static void
9214 9238 dtrace_format_destroy(dtrace_state_t *state)
9215 9239 {
9216 9240 int i;
9217 9241
9218 9242 if (state->dts_nformats == 0) {
9219 9243 ASSERT(state->dts_formats == NULL);
9220 9244 return;
9221 9245 }
9222 9246
9223 9247 ASSERT(state->dts_formats != NULL);
9224 9248
9225 9249 for (i = 0; i < state->dts_nformats; i++) {
9226 9250 char *fmt = state->dts_formats[i];
9227 9251
9228 9252 if (fmt == NULL)
9229 9253 continue;
9230 9254
9231 9255 kmem_free(fmt, strlen(fmt) + 1);
9232 9256 }
9233 9257
9234 9258 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
9235 9259 state->dts_nformats = 0;
9236 9260 state->dts_formats = NULL;
9237 9261 }
9238 9262
9239 9263 /*
9240 9264 * DTrace Predicate Functions
9241 9265 */
9242 9266 static dtrace_predicate_t *
9243 9267 dtrace_predicate_create(dtrace_difo_t *dp)
9244 9268 {
9245 9269 dtrace_predicate_t *pred;
9246 9270
9247 9271 ASSERT(MUTEX_HELD(&dtrace_lock));
9248 9272 ASSERT(dp->dtdo_refcnt != 0);
9249 9273
9250 9274 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
9251 9275 pred->dtp_difo = dp;
9252 9276 pred->dtp_refcnt = 1;
9253 9277
9254 9278 if (!dtrace_difo_cacheable(dp))
9255 9279 return (pred);
9256 9280
9257 9281 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
9258 9282 /*
9259 9283 * This is only theoretically possible -- we have had 2^32
9260 9284 * cacheable predicates on this machine. We cannot allow any
9261 9285 * more predicates to become cacheable: as unlikely as it is,
9262 9286 * there may be a thread caching a (now stale) predicate cache
9263 9287 * ID. (N.B.: the temptation is being successfully resisted to
9264 9288 * have this cmn_err() "Holy shit -- we executed this code!")
9265 9289 */
9266 9290 return (pred);
9267 9291 }
9268 9292
9269 9293 pred->dtp_cacheid = dtrace_predcache_id++;
9270 9294
9271 9295 return (pred);
9272 9296 }
9273 9297
9274 9298 static void
9275 9299 dtrace_predicate_hold(dtrace_predicate_t *pred)
9276 9300 {
9277 9301 ASSERT(MUTEX_HELD(&dtrace_lock));
9278 9302 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
9279 9303 ASSERT(pred->dtp_refcnt > 0);
9280 9304
9281 9305 pred->dtp_refcnt++;
9282 9306 }
9283 9307
9284 9308 static void
9285 9309 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
9286 9310 {
9287 9311 dtrace_difo_t *dp = pred->dtp_difo;
9288 9312
9289 9313 ASSERT(MUTEX_HELD(&dtrace_lock));
9290 9314 ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
9291 9315 ASSERT(pred->dtp_refcnt > 0);
9292 9316
9293 9317 if (--pred->dtp_refcnt == 0) {
9294 9318 dtrace_difo_release(pred->dtp_difo, vstate);
9295 9319 kmem_free(pred, sizeof (dtrace_predicate_t));
9296 9320 }
9297 9321 }
9298 9322
9299 9323 /*
9300 9324 * DTrace Action Description Functions
9301 9325 */
9302 9326 static dtrace_actdesc_t *
9303 9327 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
9304 9328 uint64_t uarg, uint64_t arg)
9305 9329 {
9306 9330 dtrace_actdesc_t *act;
9307 9331
9308 9332 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
9309 9333 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
9310 9334
9311 9335 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
9312 9336 act->dtad_kind = kind;
9313 9337 act->dtad_ntuple = ntuple;
9314 9338 act->dtad_uarg = uarg;
9315 9339 act->dtad_arg = arg;
9316 9340 act->dtad_refcnt = 1;
9317 9341
9318 9342 return (act);
9319 9343 }
9320 9344
9321 9345 static void
9322 9346 dtrace_actdesc_hold(dtrace_actdesc_t *act)
9323 9347 {
9324 9348 ASSERT(act->dtad_refcnt >= 1);
9325 9349 act->dtad_refcnt++;
9326 9350 }
9327 9351
9328 9352 static void
9329 9353 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
9330 9354 {
9331 9355 dtrace_actkind_t kind = act->dtad_kind;
9332 9356 dtrace_difo_t *dp;
9333 9357
9334 9358 ASSERT(act->dtad_refcnt >= 1);
9335 9359
9336 9360 if (--act->dtad_refcnt != 0)
9337 9361 return;
9338 9362
9339 9363 if ((dp = act->dtad_difo) != NULL)
9340 9364 dtrace_difo_release(dp, vstate);
9341 9365
9342 9366 if (DTRACEACT_ISPRINTFLIKE(kind)) {
9343 9367 char *str = (char *)(uintptr_t)act->dtad_arg;
9344 9368
9345 9369 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
9346 9370 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
9347 9371
9348 9372 if (str != NULL)
9349 9373 kmem_free(str, strlen(str) + 1);
9350 9374 }
9351 9375
9352 9376 kmem_free(act, sizeof (dtrace_actdesc_t));
9353 9377 }
9354 9378
9355 9379 /*
9356 9380 * DTrace ECB Functions
9357 9381 */
9358 9382 static dtrace_ecb_t *
9359 9383 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
9360 9384 {
9361 9385 dtrace_ecb_t *ecb;
9362 9386 dtrace_epid_t epid;
9363 9387
9364 9388 ASSERT(MUTEX_HELD(&dtrace_lock));
9365 9389
9366 9390 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
9367 9391 ecb->dte_predicate = NULL;
9368 9392 ecb->dte_probe = probe;
9369 9393
9370 9394 /*
9371 9395 * The default size is the size of the default action: recording
9372 9396 * the epid.
9373 9397 */
9374 9398 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
9375 9399 ecb->dte_alignment = sizeof (dtrace_epid_t);
9376 9400
9377 9401 epid = state->dts_epid++;
9378 9402
9379 9403 if (epid - 1 >= state->dts_necbs) {
9380 9404 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
9381 9405 int necbs = state->dts_necbs << 1;
9382 9406
9383 9407 ASSERT(epid == state->dts_necbs + 1);
9384 9408
9385 9409 if (necbs == 0) {
9386 9410 ASSERT(oecbs == NULL);
9387 9411 necbs = 1;
9388 9412 }
9389 9413
9390 9414 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
9391 9415
9392 9416 if (oecbs != NULL)
9393 9417 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
9394 9418
9395 9419 dtrace_membar_producer();
9396 9420 state->dts_ecbs = ecbs;
9397 9421
9398 9422 if (oecbs != NULL) {
9399 9423 /*
9400 9424 * If this state is active, we must dtrace_sync()
9401 9425 * before we can free the old dts_ecbs array: we're
9402 9426 * coming in hot, and there may be active ring
9403 9427 * buffer processing (which indexes into the dts_ecbs
9404 9428 * array) on another CPU.
9405 9429 */
9406 9430 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
9407 9431 dtrace_sync();
9408 9432
9409 9433 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
9410 9434 }
9411 9435
9412 9436 dtrace_membar_producer();
9413 9437 state->dts_necbs = necbs;
9414 9438 }
9415 9439
9416 9440 ecb->dte_state = state;
9417 9441
9418 9442 ASSERT(state->dts_ecbs[epid - 1] == NULL);
9419 9443 dtrace_membar_producer();
9420 9444 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
9421 9445
9422 9446 return (ecb);
9423 9447 }
9424 9448
9425 9449 static int
9426 9450 dtrace_ecb_enable(dtrace_ecb_t *ecb)
9427 9451 {
9428 9452 dtrace_probe_t *probe = ecb->dte_probe;
9429 9453
9430 9454 ASSERT(MUTEX_HELD(&cpu_lock));
9431 9455 ASSERT(MUTEX_HELD(&dtrace_lock));
9432 9456 ASSERT(ecb->dte_next == NULL);
9433 9457
9434 9458 if (probe == NULL) {
9435 9459 /*
9436 9460 * This is the NULL probe -- there's nothing to do.
9437 9461 */
9438 9462 return (0);
9439 9463 }
9440 9464
9441 9465 if (probe->dtpr_ecb == NULL) {
9442 9466 dtrace_provider_t *prov = probe->dtpr_provider;
9443 9467
9444 9468 /*
9445 9469 * We're the first ECB on this probe.
9446 9470 */
9447 9471 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
9448 9472
9449 9473 if (ecb->dte_predicate != NULL)
9450 9474 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
9451 9475
9452 9476 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
9453 9477 probe->dtpr_id, probe->dtpr_arg));
9454 9478 } else {
9455 9479 /*
9456 9480 * This probe is already active. Swing the last pointer to
9457 9481 * point to the new ECB, and issue a dtrace_sync() to assure
9458 9482 * that all CPUs have seen the change.
9459 9483 */
9460 9484 ASSERT(probe->dtpr_ecb_last != NULL);
9461 9485 probe->dtpr_ecb_last->dte_next = ecb;
9462 9486 probe->dtpr_ecb_last = ecb;
9463 9487 probe->dtpr_predcache = 0;
9464 9488
9465 9489 dtrace_sync();
9466 9490 return (0);
9467 9491 }
9468 9492 }
9469 9493
9470 9494 static void
9471 9495 dtrace_ecb_resize(dtrace_ecb_t *ecb)
9472 9496 {
9473 9497 uint32_t maxalign = sizeof (dtrace_epid_t);
9474 9498 uint32_t align = sizeof (uint8_t), offs, diff;
9475 9499 dtrace_action_t *act;
9476 9500 int wastuple = 0;
9477 9501 uint32_t aggbase = UINT32_MAX;
9478 9502 dtrace_state_t *state = ecb->dte_state;
9479 9503
9480 9504 /*
9481 9505 * If we record anything, we always record the epid. (And we always
9482 9506 * record it first.)
9483 9507 */
9484 9508 offs = sizeof (dtrace_epid_t);
9485 9509 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
9486 9510
9487 9511 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
9488 9512 dtrace_recdesc_t *rec = &act->dta_rec;
9489 9513
9490 9514 if ((align = rec->dtrd_alignment) > maxalign)
9491 9515 maxalign = align;
9492 9516
9493 9517 if (!wastuple && act->dta_intuple) {
9494 9518 /*
9495 9519 * This is the first record in a tuple. Align the
9496 9520 * offset to be at offset 4 in an 8-byte aligned
9497 9521 * block.
9498 9522 */
9499 9523 diff = offs + sizeof (dtrace_aggid_t);
9500 9524
9501 9525 if (diff = (diff & (sizeof (uint64_t) - 1)))
9502 9526 offs += sizeof (uint64_t) - diff;
9503 9527
9504 9528 aggbase = offs - sizeof (dtrace_aggid_t);
9505 9529 ASSERT(!(aggbase & (sizeof (uint64_t) - 1)));
9506 9530 }
9507 9531
9508 9532 /*LINTED*/
9509 9533 if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) {
9510 9534 /*
9511 9535 * The current offset is not properly aligned; align it.
9512 9536 */
9513 9537 offs += align - diff;
9514 9538 }
9515 9539
9516 9540 rec->dtrd_offset = offs;
9517 9541
9518 9542 if (offs + rec->dtrd_size > ecb->dte_needed) {
9519 9543 ecb->dte_needed = offs + rec->dtrd_size;
9520 9544
9521 9545 if (ecb->dte_needed > state->dts_needed)
9522 9546 state->dts_needed = ecb->dte_needed;
9523 9547 }
9524 9548
9525 9549 if (DTRACEACT_ISAGG(act->dta_kind)) {
9526 9550 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
9527 9551 dtrace_action_t *first = agg->dtag_first, *prev;
9528 9552
9529 9553 ASSERT(rec->dtrd_size != 0 && first != NULL);
9530 9554 ASSERT(wastuple);
9531 9555 ASSERT(aggbase != UINT32_MAX);
9532 9556
9533 9557 agg->dtag_base = aggbase;
9534 9558
9535 9559 while ((prev = first->dta_prev) != NULL &&
9536 9560 DTRACEACT_ISAGG(prev->dta_kind)) {
9537 9561 agg = (dtrace_aggregation_t *)prev;
9538 9562 first = agg->dtag_first;
9539 9563 }
9540 9564
9541 9565 if (prev != NULL) {
9542 9566 offs = prev->dta_rec.dtrd_offset +
9543 9567 prev->dta_rec.dtrd_size;
9544 9568 } else {
9545 9569 offs = sizeof (dtrace_epid_t);
9546 9570 }
9547 9571 wastuple = 0;
9548 9572 } else {
9549 9573 if (!act->dta_intuple)
9550 9574 ecb->dte_size = offs + rec->dtrd_size;
9551 9575
9552 9576 offs += rec->dtrd_size;
9553 9577 }
9554 9578
9555 9579 wastuple = act->dta_intuple;
9556 9580 }
9557 9581
9558 9582 if ((act = ecb->dte_action) != NULL &&
9559 9583 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
9560 9584 ecb->dte_size == sizeof (dtrace_epid_t)) {
9561 9585 /*
9562 9586 * If the size is still sizeof (dtrace_epid_t), then all
9563 9587 * actions store no data; set the size to 0.
9564 9588 */
9565 9589 ecb->dte_alignment = maxalign;
9566 9590 ecb->dte_size = 0;
9567 9591
9568 9592 /*
9569 9593 * If the needed space is still sizeof (dtrace_epid_t), then
9570 9594 * all actions need no additional space; set the needed
9571 9595 * size to 0.
9572 9596 */
9573 9597 if (ecb->dte_needed == sizeof (dtrace_epid_t))
9574 9598 ecb->dte_needed = 0;
9575 9599
9576 9600 return;
9577 9601 }
9578 9602
9579 9603 /*
9580 9604 * Set our alignment, and make sure that the dte_size and dte_needed
9581 9605 * are aligned to the size of an EPID.
9582 9606 */
9583 9607 ecb->dte_alignment = maxalign;
9584 9608 ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) &
9585 9609 ~(sizeof (dtrace_epid_t) - 1);
9586 9610 ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) &
9587 9611 ~(sizeof (dtrace_epid_t) - 1);
9588 9612 ASSERT(ecb->dte_size <= ecb->dte_needed);
9589 9613 }
9590 9614
9591 9615 static dtrace_action_t *
9592 9616 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
9593 9617 {
9594 9618 dtrace_aggregation_t *agg;
9595 9619 size_t size = sizeof (uint64_t);
9596 9620 int ntuple = desc->dtad_ntuple;
9597 9621 dtrace_action_t *act;
9598 9622 dtrace_recdesc_t *frec;
9599 9623 dtrace_aggid_t aggid;
9600 9624 dtrace_state_t *state = ecb->dte_state;
9601 9625
9602 9626 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
9603 9627 agg->dtag_ecb = ecb;
9604 9628
9605 9629 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
9606 9630
9607 9631 switch (desc->dtad_kind) {
9608 9632 case DTRACEAGG_MIN:
9609 9633 agg->dtag_initial = INT64_MAX;
9610 9634 agg->dtag_aggregate = dtrace_aggregate_min;
9611 9635 break;
9612 9636
9613 9637 case DTRACEAGG_MAX:
9614 9638 agg->dtag_initial = INT64_MIN;
9615 9639 agg->dtag_aggregate = dtrace_aggregate_max;
9616 9640 break;
9617 9641
9618 9642 case DTRACEAGG_COUNT:
9619 9643 agg->dtag_aggregate = dtrace_aggregate_count;
9620 9644 break;
9621 9645
9622 9646 case DTRACEAGG_QUANTIZE:
9623 9647 agg->dtag_aggregate = dtrace_aggregate_quantize;
9624 9648 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
9625 9649 sizeof (uint64_t);
9626 9650 break;
9627 9651
9628 9652 case DTRACEAGG_LQUANTIZE: {
9629 9653 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
9630 9654 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
9631 9655
9632 9656 agg->dtag_initial = desc->dtad_arg;
9633 9657 agg->dtag_aggregate = dtrace_aggregate_lquantize;
9634 9658
9635 9659 if (step == 0 || levels == 0)
9636 9660 goto err;
9637 9661
9638 9662 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
9639 9663 break;
9640 9664 }
9641 9665
9642 9666 case DTRACEAGG_LLQUANTIZE: {
9643 9667 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
9644 9668 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
9645 9669 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
9646 9670 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
9647 9671 int64_t v;
9648 9672
9649 9673 agg->dtag_initial = desc->dtad_arg;
9650 9674 agg->dtag_aggregate = dtrace_aggregate_llquantize;
9651 9675
9652 9676 if (factor < 2 || low >= high || nsteps < factor)
9653 9677 goto err;
9654 9678
9655 9679 /*
9656 9680 * Now check that the number of steps evenly divides a power
9657 9681 * of the factor. (This assures both integer bucket size and
9658 9682 * linearity within each magnitude.)
9659 9683 */
9660 9684 for (v = factor; v < nsteps; v *= factor)
9661 9685 continue;
9662 9686
9663 9687 if ((v % nsteps) || (nsteps % factor))
9664 9688 goto err;
9665 9689
9666 9690 size = (dtrace_aggregate_llquantize_bucket(factor,
9667 9691 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
9668 9692 break;
9669 9693 }
9670 9694
9671 9695 case DTRACEAGG_AVG:
9672 9696 agg->dtag_aggregate = dtrace_aggregate_avg;
9673 9697 size = sizeof (uint64_t) * 2;
9674 9698 break;
9675 9699
9676 9700 case DTRACEAGG_STDDEV:
9677 9701 agg->dtag_aggregate = dtrace_aggregate_stddev;
9678 9702 size = sizeof (uint64_t) * 4;
9679 9703 break;
9680 9704
9681 9705 case DTRACEAGG_SUM:
9682 9706 agg->dtag_aggregate = dtrace_aggregate_sum;
9683 9707 break;
9684 9708
9685 9709 default:
9686 9710 goto err;
9687 9711 }
9688 9712
9689 9713 agg->dtag_action.dta_rec.dtrd_size = size;
9690 9714
9691 9715 if (ntuple == 0)
9692 9716 goto err;
9693 9717
9694 9718 /*
9695 9719 * We must make sure that we have enough actions for the n-tuple.
9696 9720 */
9697 9721 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
9698 9722 if (DTRACEACT_ISAGG(act->dta_kind))
9699 9723 break;
9700 9724
9701 9725 if (--ntuple == 0) {
9702 9726 /*
9703 9727 * This is the action with which our n-tuple begins.
9704 9728 */
9705 9729 agg->dtag_first = act;
9706 9730 goto success;
9707 9731 }
9708 9732 }
9709 9733
9710 9734 /*
9711 9735 * This n-tuple is short by ntuple elements. Return failure.
9712 9736 */
9713 9737 ASSERT(ntuple != 0);
9714 9738 err:
9715 9739 kmem_free(agg, sizeof (dtrace_aggregation_t));
9716 9740 return (NULL);
9717 9741
9718 9742 success:
9719 9743 /*
9720 9744 * If the last action in the tuple has a size of zero, it's actually
9721 9745 * an expression argument for the aggregating action.
9722 9746 */
9723 9747 ASSERT(ecb->dte_action_last != NULL);
9724 9748 act = ecb->dte_action_last;
9725 9749
9726 9750 if (act->dta_kind == DTRACEACT_DIFEXPR) {
9727 9751 ASSERT(act->dta_difo != NULL);
9728 9752
9729 9753 if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
9730 9754 agg->dtag_hasarg = 1;
9731 9755 }
9732 9756
9733 9757 /*
9734 9758 * We need to allocate an id for this aggregation.
9735 9759 */
9736 9760 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
9737 9761 VM_BESTFIT | VM_SLEEP);
9738 9762
9739 9763 if (aggid - 1 >= state->dts_naggregations) {
9740 9764 dtrace_aggregation_t **oaggs = state->dts_aggregations;
9741 9765 dtrace_aggregation_t **aggs;
9742 9766 int naggs = state->dts_naggregations << 1;
9743 9767 int onaggs = state->dts_naggregations;
9744 9768
9745 9769 ASSERT(aggid == state->dts_naggregations + 1);
9746 9770
9747 9771 if (naggs == 0) {
9748 9772 ASSERT(oaggs == NULL);
9749 9773 naggs = 1;
9750 9774 }
9751 9775
9752 9776 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
9753 9777
9754 9778 if (oaggs != NULL) {
9755 9779 bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
9756 9780 kmem_free(oaggs, onaggs * sizeof (*aggs));
9757 9781 }
9758 9782
9759 9783 state->dts_aggregations = aggs;
9760 9784 state->dts_naggregations = naggs;
9761 9785 }
9762 9786
9763 9787 ASSERT(state->dts_aggregations[aggid - 1] == NULL);
9764 9788 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
9765 9789
9766 9790 frec = &agg->dtag_first->dta_rec;
9767 9791 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
9768 9792 frec->dtrd_alignment = sizeof (dtrace_aggid_t);
9769 9793
9770 9794 for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
9771 9795 ASSERT(!act->dta_intuple);
9772 9796 act->dta_intuple = 1;
9773 9797 }
9774 9798
9775 9799 return (&agg->dtag_action);
9776 9800 }
9777 9801
9778 9802 static void
9779 9803 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
9780 9804 {
9781 9805 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
9782 9806 dtrace_state_t *state = ecb->dte_state;
9783 9807 dtrace_aggid_t aggid = agg->dtag_id;
9784 9808
9785 9809 ASSERT(DTRACEACT_ISAGG(act->dta_kind));
9786 9810 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
9787 9811
9788 9812 ASSERT(state->dts_aggregations[aggid - 1] == agg);
9789 9813 state->dts_aggregations[aggid - 1] = NULL;
9790 9814
9791 9815 kmem_free(agg, sizeof (dtrace_aggregation_t));
9792 9816 }
9793 9817
9794 9818 static int
9795 9819 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
9796 9820 {
9797 9821 dtrace_action_t *action, *last;
9798 9822 dtrace_difo_t *dp = desc->dtad_difo;
9799 9823 uint32_t size = 0, align = sizeof (uint8_t), mask;
9800 9824 uint16_t format = 0;
9801 9825 dtrace_recdesc_t *rec;
9802 9826 dtrace_state_t *state = ecb->dte_state;
9803 9827 dtrace_optval_t *opt = state->dts_options, nframes, strsize;
9804 9828 uint64_t arg = desc->dtad_arg;
9805 9829
9806 9830 ASSERT(MUTEX_HELD(&dtrace_lock));
9807 9831 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
9808 9832
9809 9833 if (DTRACEACT_ISAGG(desc->dtad_kind)) {
9810 9834 /*
9811 9835 * If this is an aggregating action, there must be neither
9812 9836 * a speculate nor a commit on the action chain.
9813 9837 */
9814 9838 dtrace_action_t *act;
9815 9839
9816 9840 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
9817 9841 if (act->dta_kind == DTRACEACT_COMMIT)
9818 9842 return (EINVAL);
9819 9843
9820 9844 if (act->dta_kind == DTRACEACT_SPECULATE)
9821 9845 return (EINVAL);
9822 9846 }
9823 9847
9824 9848 action = dtrace_ecb_aggregation_create(ecb, desc);
9825 9849
9826 9850 if (action == NULL)
9827 9851 return (EINVAL);
9828 9852 } else {
9829 9853 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
9830 9854 (desc->dtad_kind == DTRACEACT_DIFEXPR &&
9831 9855 dp != NULL && dp->dtdo_destructive)) {
9832 9856 state->dts_destructive = 1;
9833 9857 }
9834 9858
9835 9859 switch (desc->dtad_kind) {
9836 9860 case DTRACEACT_PRINTF:
9837 9861 case DTRACEACT_PRINTA:
9838 9862 case DTRACEACT_SYSTEM:
9839 9863 case DTRACEACT_FREOPEN:
9840 9864 case DTRACEACT_DIFEXPR:
9841 9865 /*
9842 9866 * We know that our arg is a string -- turn it into a
9843 9867 * format.
9844 9868 */
9845 9869 if (arg == NULL) {
9846 9870 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
9847 9871 desc->dtad_kind == DTRACEACT_DIFEXPR);
9848 9872 format = 0;
9849 9873 } else {
9850 9874 ASSERT(arg != NULL);
9851 9875 ASSERT(arg > KERNELBASE);
9852 9876 format = dtrace_format_add(state,
9853 9877 (char *)(uintptr_t)arg);
9854 9878 }
9855 9879
9856 9880 /*FALLTHROUGH*/
9857 9881 case DTRACEACT_LIBACT:
9858 9882 case DTRACEACT_TRACEMEM:
9859 9883 case DTRACEACT_TRACEMEM_DYNSIZE:
9860 9884 if (dp == NULL)
9861 9885 return (EINVAL);
9862 9886
9863 9887 if ((size = dp->dtdo_rtype.dtdt_size) != 0)
9864 9888 break;
9865 9889
9866 9890 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
9867 9891 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9868 9892 return (EINVAL);
9869 9893
9870 9894 size = opt[DTRACEOPT_STRSIZE];
9871 9895 }
9872 9896
9873 9897 break;
9874 9898
9875 9899 case DTRACEACT_STACK:
9876 9900 if ((nframes = arg) == 0) {
9877 9901 nframes = opt[DTRACEOPT_STACKFRAMES];
9878 9902 ASSERT(nframes > 0);
9879 9903 arg = nframes;
9880 9904 }
9881 9905
9882 9906 size = nframes * sizeof (pc_t);
9883 9907 break;
9884 9908
9885 9909 case DTRACEACT_JSTACK:
9886 9910 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
9887 9911 strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
9888 9912
9889 9913 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
9890 9914 nframes = opt[DTRACEOPT_JSTACKFRAMES];
9891 9915
9892 9916 arg = DTRACE_USTACK_ARG(nframes, strsize);
9893 9917
9894 9918 /*FALLTHROUGH*/
9895 9919 case DTRACEACT_USTACK:
9896 9920 if (desc->dtad_kind != DTRACEACT_JSTACK &&
9897 9921 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
9898 9922 strsize = DTRACE_USTACK_STRSIZE(arg);
9899 9923 nframes = opt[DTRACEOPT_USTACKFRAMES];
9900 9924 ASSERT(nframes > 0);
9901 9925 arg = DTRACE_USTACK_ARG(nframes, strsize);
9902 9926 }
9903 9927
9904 9928 /*
9905 9929 * Save a slot for the pid.
9906 9930 */
9907 9931 size = (nframes + 1) * sizeof (uint64_t);
9908 9932 size += DTRACE_USTACK_STRSIZE(arg);
9909 9933 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
9910 9934
9911 9935 break;
9912 9936
9913 9937 case DTRACEACT_SYM:
9914 9938 case DTRACEACT_MOD:
9915 9939 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
9916 9940 sizeof (uint64_t)) ||
9917 9941 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9918 9942 return (EINVAL);
9919 9943 break;
9920 9944
9921 9945 case DTRACEACT_USYM:
9922 9946 case DTRACEACT_UMOD:
9923 9947 case DTRACEACT_UADDR:
9924 9948 if (dp == NULL ||
9925 9949 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
9926 9950 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9927 9951 return (EINVAL);
9928 9952
9929 9953 /*
9930 9954 * We have a slot for the pid, plus a slot for the
9931 9955 * argument. To keep things simple (aligned with
9932 9956 * bitness-neutral sizing), we store each as a 64-bit
9933 9957 * quantity.
9934 9958 */
9935 9959 size = 2 * sizeof (uint64_t);
9936 9960 break;
9937 9961
9938 9962 case DTRACEACT_STOP:
9939 9963 case DTRACEACT_BREAKPOINT:
9940 9964 case DTRACEACT_PANIC:
9941 9965 break;
9942 9966
9943 9967 case DTRACEACT_CHILL:
9944 9968 case DTRACEACT_DISCARD:
9945 9969 case DTRACEACT_RAISE:
9946 9970 if (dp == NULL)
9947 9971 return (EINVAL);
9948 9972 break;
9949 9973
9950 9974 case DTRACEACT_EXIT:
9951 9975 if (dp == NULL ||
9952 9976 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
9953 9977 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9954 9978 return (EINVAL);
9955 9979 break;
9956 9980
9957 9981 case DTRACEACT_SPECULATE:
9958 9982 if (ecb->dte_size > sizeof (dtrace_epid_t))
9959 9983 return (EINVAL);
9960 9984
9961 9985 if (dp == NULL)
9962 9986 return (EINVAL);
9963 9987
9964 9988 state->dts_speculates = 1;
9965 9989 break;
9966 9990
9967 9991 case DTRACEACT_COMMIT: {
9968 9992 dtrace_action_t *act = ecb->dte_action;
9969 9993
9970 9994 for (; act != NULL; act = act->dta_next) {
9971 9995 if (act->dta_kind == DTRACEACT_COMMIT)
9972 9996 return (EINVAL);
9973 9997 }
9974 9998
9975 9999 if (dp == NULL)
9976 10000 return (EINVAL);
9977 10001 break;
9978 10002 }
9979 10003
9980 10004 default:
9981 10005 return (EINVAL);
9982 10006 }
9983 10007
9984 10008 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
9985 10009 /*
9986 10010 * If this is a data-storing action or a speculate,
9987 10011 * we must be sure that there isn't a commit on the
9988 10012 * action chain.
9989 10013 */
9990 10014 dtrace_action_t *act = ecb->dte_action;
9991 10015
9992 10016 for (; act != NULL; act = act->dta_next) {
9993 10017 if (act->dta_kind == DTRACEACT_COMMIT)
9994 10018 return (EINVAL);
9995 10019 }
9996 10020 }
9997 10021
9998 10022 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
9999 10023 action->dta_rec.dtrd_size = size;
10000 10024 }
10001 10025
10002 10026 action->dta_refcnt = 1;
10003 10027 rec = &action->dta_rec;
10004 10028 size = rec->dtrd_size;
10005 10029
10006 10030 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
10007 10031 if (!(size & mask)) {
10008 10032 align = mask + 1;
10009 10033 break;
10010 10034 }
10011 10035 }
10012 10036
10013 10037 action->dta_kind = desc->dtad_kind;
10014 10038
10015 10039 if ((action->dta_difo = dp) != NULL)
10016 10040 dtrace_difo_hold(dp);
10017 10041
10018 10042 rec->dtrd_action = action->dta_kind;
10019 10043 rec->dtrd_arg = arg;
10020 10044 rec->dtrd_uarg = desc->dtad_uarg;
10021 10045 rec->dtrd_alignment = (uint16_t)align;
10022 10046 rec->dtrd_format = format;
10023 10047
10024 10048 if ((last = ecb->dte_action_last) != NULL) {
10025 10049 ASSERT(ecb->dte_action != NULL);
10026 10050 action->dta_prev = last;
10027 10051 last->dta_next = action;
10028 10052 } else {
10029 10053 ASSERT(ecb->dte_action == NULL);
10030 10054 ecb->dte_action = action;
10031 10055 }
10032 10056
10033 10057 ecb->dte_action_last = action;
10034 10058
10035 10059 return (0);
10036 10060 }
10037 10061
10038 10062 static void
10039 10063 dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
10040 10064 {
10041 10065 dtrace_action_t *act = ecb->dte_action, *next;
10042 10066 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
10043 10067 dtrace_difo_t *dp;
10044 10068 uint16_t format;
10045 10069
10046 10070 if (act != NULL && act->dta_refcnt > 1) {
10047 10071 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
10048 10072 act->dta_refcnt--;
10049 10073 } else {
10050 10074 for (; act != NULL; act = next) {
10051 10075 next = act->dta_next;
10052 10076 ASSERT(next != NULL || act == ecb->dte_action_last);
10053 10077 ASSERT(act->dta_refcnt == 1);
10054 10078
10055 10079 if ((format = act->dta_rec.dtrd_format) != 0)
10056 10080 dtrace_format_remove(ecb->dte_state, format);
10057 10081
10058 10082 if ((dp = act->dta_difo) != NULL)
10059 10083 dtrace_difo_release(dp, vstate);
10060 10084
10061 10085 if (DTRACEACT_ISAGG(act->dta_kind)) {
10062 10086 dtrace_ecb_aggregation_destroy(ecb, act);
10063 10087 } else {
10064 10088 kmem_free(act, sizeof (dtrace_action_t));
10065 10089 }
10066 10090 }
10067 10091 }
10068 10092
10069 10093 ecb->dte_action = NULL;
10070 10094 ecb->dte_action_last = NULL;
10071 10095 ecb->dte_size = sizeof (dtrace_epid_t);
10072 10096 }
10073 10097
10074 10098 static void
10075 10099 dtrace_ecb_disable(dtrace_ecb_t *ecb)
10076 10100 {
10077 10101 /*
10078 10102 * We disable the ECB by removing it from its probe.
10079 10103 */
10080 10104 dtrace_ecb_t *pecb, *prev = NULL;
10081 10105 dtrace_probe_t *probe = ecb->dte_probe;
10082 10106
10083 10107 ASSERT(MUTEX_HELD(&dtrace_lock));
10084 10108
10085 10109 if (probe == NULL) {
10086 10110 /*
10087 10111 * This is the NULL probe; there is nothing to disable.
10088 10112 */
10089 10113 return;
10090 10114 }
10091 10115
10092 10116 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
10093 10117 if (pecb == ecb)
10094 10118 break;
10095 10119 prev = pecb;
10096 10120 }
10097 10121
10098 10122 ASSERT(pecb != NULL);
10099 10123
10100 10124 if (prev == NULL) {
10101 10125 probe->dtpr_ecb = ecb->dte_next;
10102 10126 } else {
10103 10127 prev->dte_next = ecb->dte_next;
10104 10128 }
10105 10129
10106 10130 if (ecb == probe->dtpr_ecb_last) {
10107 10131 ASSERT(ecb->dte_next == NULL);
10108 10132 probe->dtpr_ecb_last = prev;
10109 10133 }
10110 10134
10111 10135 /*
10112 10136 * The ECB has been disconnected from the probe; now sync to assure
10113 10137 * that all CPUs have seen the change before returning.
10114 10138 */
10115 10139 dtrace_sync();
10116 10140
10117 10141 if (probe->dtpr_ecb == NULL) {
10118 10142 /*
10119 10143 * That was the last ECB on the probe; clear the predicate
10120 10144 * cache ID for the probe, disable it and sync one more time
10121 10145 * to assure that we'll never hit it again.
10122 10146 */
10123 10147 dtrace_provider_t *prov = probe->dtpr_provider;
10124 10148
10125 10149 ASSERT(ecb->dte_next == NULL);
10126 10150 ASSERT(probe->dtpr_ecb_last == NULL);
10127 10151 probe->dtpr_predcache = DTRACE_CACHEIDNONE;
10128 10152 prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
10129 10153 probe->dtpr_id, probe->dtpr_arg);
10130 10154 dtrace_sync();
10131 10155 } else {
10132 10156 /*
10133 10157 * There is at least one ECB remaining on the probe. If there
10134 10158 * is _exactly_ one, set the probe's predicate cache ID to be
10135 10159 * the predicate cache ID of the remaining ECB.
10136 10160 */
10137 10161 ASSERT(probe->dtpr_ecb_last != NULL);
10138 10162 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
10139 10163
10140 10164 if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
10141 10165 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
10142 10166
10143 10167 ASSERT(probe->dtpr_ecb->dte_next == NULL);
10144 10168
10145 10169 if (p != NULL)
10146 10170 probe->dtpr_predcache = p->dtp_cacheid;
10147 10171 }
10148 10172
10149 10173 ecb->dte_next = NULL;
10150 10174 }
10151 10175 }
10152 10176
10153 10177 static void
10154 10178 dtrace_ecb_destroy(dtrace_ecb_t *ecb)
10155 10179 {
10156 10180 dtrace_state_t *state = ecb->dte_state;
10157 10181 dtrace_vstate_t *vstate = &state->dts_vstate;
10158 10182 dtrace_predicate_t *pred;
10159 10183 dtrace_epid_t epid = ecb->dte_epid;
10160 10184
10161 10185 ASSERT(MUTEX_HELD(&dtrace_lock));
10162 10186 ASSERT(ecb->dte_next == NULL);
10163 10187 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
10164 10188
10165 10189 if ((pred = ecb->dte_predicate) != NULL)
10166 10190 dtrace_predicate_release(pred, vstate);
10167 10191
10168 10192 dtrace_ecb_action_remove(ecb);
10169 10193
10170 10194 ASSERT(state->dts_ecbs[epid - 1] == ecb);
10171 10195 state->dts_ecbs[epid - 1] = NULL;
10172 10196
10173 10197 kmem_free(ecb, sizeof (dtrace_ecb_t));
10174 10198 }
10175 10199
10176 10200 static dtrace_ecb_t *
10177 10201 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
10178 10202 dtrace_enabling_t *enab)
10179 10203 {
10180 10204 dtrace_ecb_t *ecb;
10181 10205 dtrace_predicate_t *pred;
10182 10206 dtrace_actdesc_t *act;
10183 10207 dtrace_provider_t *prov;
10184 10208 dtrace_ecbdesc_t *desc = enab->dten_current;
10185 10209
10186 10210 ASSERT(MUTEX_HELD(&dtrace_lock));
10187 10211 ASSERT(state != NULL);
10188 10212
10189 10213 ecb = dtrace_ecb_add(state, probe);
10190 10214 ecb->dte_uarg = desc->dted_uarg;
10191 10215
10192 10216 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
10193 10217 dtrace_predicate_hold(pred);
10194 10218 ecb->dte_predicate = pred;
10195 10219 }
10196 10220
10197 10221 if (probe != NULL) {
10198 10222 /*
10199 10223 * If the provider shows more leg than the consumer is old
10200 10224 * enough to see, we need to enable the appropriate implicit
10201 10225 * predicate bits to prevent the ecb from activating at
10202 10226 * revealing times.
10203 10227 *
10204 10228 * Providers specifying DTRACE_PRIV_USER at register time
10205 10229 * are stating that they need the /proc-style privilege
10206 10230 * model to be enforced, and this is what DTRACE_COND_OWNER
10207 10231 * and DTRACE_COND_ZONEOWNER will then do at probe time.
10208 10232 */
10209 10233 prov = probe->dtpr_provider;
10210 10234 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
10211 10235 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10212 10236 ecb->dte_cond |= DTRACE_COND_OWNER;
10213 10237
10214 10238 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
10215 10239 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10216 10240 ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
10217 10241
10218 10242 /*
10219 10243 * If the provider shows us kernel innards and the user
10220 10244 * is lacking sufficient privilege, enable the
10221 10245 * DTRACE_COND_USERMODE implicit predicate.
10222 10246 */
10223 10247 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
10224 10248 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
10225 10249 ecb->dte_cond |= DTRACE_COND_USERMODE;
10226 10250 }
10227 10251
10228 10252 if (dtrace_ecb_create_cache != NULL) {
10229 10253 /*
10230 10254 * If we have a cached ecb, we'll use its action list instead
10231 10255 * of creating our own (saving both time and space).
10232 10256 */
10233 10257 dtrace_ecb_t *cached = dtrace_ecb_create_cache;
10234 10258 dtrace_action_t *act = cached->dte_action;
10235 10259
10236 10260 if (act != NULL) {
10237 10261 ASSERT(act->dta_refcnt > 0);
10238 10262 act->dta_refcnt++;
10239 10263 ecb->dte_action = act;
10240 10264 ecb->dte_action_last = cached->dte_action_last;
10241 10265 ecb->dte_needed = cached->dte_needed;
10242 10266 ecb->dte_size = cached->dte_size;
10243 10267 ecb->dte_alignment = cached->dte_alignment;
10244 10268 }
10245 10269
10246 10270 return (ecb);
10247 10271 }
10248 10272
10249 10273 for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
10250 10274 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
10251 10275 dtrace_ecb_destroy(ecb);
10252 10276 return (NULL);
10253 10277 }
10254 10278 }
10255 10279
10256 10280 dtrace_ecb_resize(ecb);
10257 10281
10258 10282 return (dtrace_ecb_create_cache = ecb);
10259 10283 }
10260 10284
10261 10285 static int
10262 10286 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
10263 10287 {
10264 10288 dtrace_ecb_t *ecb;
10265 10289 dtrace_enabling_t *enab = arg;
10266 10290 dtrace_state_t *state = enab->dten_vstate->dtvs_state;
10267 10291
10268 10292 ASSERT(state != NULL);
10269 10293
10270 10294 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
10271 10295 /*
10272 10296 * This probe was created in a generation for which this
10273 10297 * enabling has previously created ECBs; we don't want to
10274 10298 * enable it again, so just kick out.
10275 10299 */
10276 10300 return (DTRACE_MATCH_NEXT);
10277 10301 }
10278 10302
10279 10303 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
10280 10304 return (DTRACE_MATCH_DONE);
10281 10305
10282 10306 if (dtrace_ecb_enable(ecb) < 0)
10283 10307 return (DTRACE_MATCH_FAIL);
10284 10308
10285 10309 return (DTRACE_MATCH_NEXT);
10286 10310 }
10287 10311
10288 10312 static dtrace_ecb_t *
10289 10313 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
10290 10314 {
10291 10315 dtrace_ecb_t *ecb;
10292 10316
10293 10317 ASSERT(MUTEX_HELD(&dtrace_lock));
10294 10318
10295 10319 if (id == 0 || id > state->dts_necbs)
10296 10320 return (NULL);
10297 10321
10298 10322 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
10299 10323 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
10300 10324
10301 10325 return (state->dts_ecbs[id - 1]);
10302 10326 }
10303 10327
10304 10328 static dtrace_aggregation_t *
10305 10329 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
10306 10330 {
10307 10331 dtrace_aggregation_t *agg;
10308 10332
10309 10333 ASSERT(MUTEX_HELD(&dtrace_lock));
10310 10334
10311 10335 if (id == 0 || id > state->dts_naggregations)
10312 10336 return (NULL);
10313 10337
10314 10338 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
10315 10339 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
10316 10340 agg->dtag_id == id);
10317 10341
10318 10342 return (state->dts_aggregations[id - 1]);
10319 10343 }
10320 10344
10321 10345 /*
10322 10346 * DTrace Buffer Functions
10323 10347 *
10324 10348 * The following functions manipulate DTrace buffers. Most of these functions
10325 10349 * are called in the context of establishing or processing consumer state;
10326 10350 * exceptions are explicitly noted.
10327 10351 */
10328 10352
10329 10353 /*
10330 10354 * Note: called from cross call context. This function switches the two
10331 10355 * buffers on a given CPU. The atomicity of this operation is assured by
10332 10356 * disabling interrupts while the actual switch takes place; the disabling of
10333 10357 * interrupts serializes the execution with any execution of dtrace_probe() on
10334 10358 * the same CPU.
10335 10359 */
10336 10360 static void
10337 10361 dtrace_buffer_switch(dtrace_buffer_t *buf)
10338 10362 {
10339 10363 caddr_t tomax = buf->dtb_tomax;
10340 10364 caddr_t xamot = buf->dtb_xamot;
10341 10365 dtrace_icookie_t cookie;
10342 10366 hrtime_t now = dtrace_gethrtime();
10343 10367
10344 10368 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10345 10369 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
10346 10370
10347 10371 cookie = dtrace_interrupt_disable();
10348 10372 buf->dtb_tomax = xamot;
10349 10373 buf->dtb_xamot = tomax;
10350 10374 buf->dtb_xamot_drops = buf->dtb_drops;
10351 10375 buf->dtb_xamot_offset = buf->dtb_offset;
10352 10376 buf->dtb_xamot_errors = buf->dtb_errors;
10353 10377 buf->dtb_xamot_flags = buf->dtb_flags;
10354 10378 buf->dtb_offset = 0;
10355 10379 buf->dtb_drops = 0;
10356 10380 buf->dtb_errors = 0;
10357 10381 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
10358 10382 buf->dtb_interval = now - buf->dtb_switched;
10359 10383 buf->dtb_switched = now;
10360 10384 dtrace_interrupt_enable(cookie);
10361 10385 }
10362 10386
10363 10387 /*
10364 10388 * Note: called from cross call context. This function activates a buffer
10365 10389 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
10366 10390 * is guaranteed by the disabling of interrupts.
10367 10391 */
10368 10392 static void
10369 10393 dtrace_buffer_activate(dtrace_state_t *state)
10370 10394 {
10371 10395 dtrace_buffer_t *buf;
10372 10396 dtrace_icookie_t cookie = dtrace_interrupt_disable();
10373 10397
10374 10398 buf = &state->dts_buffer[CPU->cpu_id];
10375 10399
10376 10400 if (buf->dtb_tomax != NULL) {
10377 10401 /*
10378 10402 * We might like to assert that the buffer is marked inactive,
10379 10403 * but this isn't necessarily true: the buffer for the CPU
10380 10404 * that processes the BEGIN probe has its buffer activated
10381 10405 * manually. In this case, we take the (harmless) action
10382 10406 * re-clearing the bit INACTIVE bit.
10383 10407 */
10384 10408 buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
10385 10409 }
10386 10410
10387 10411 dtrace_interrupt_enable(cookie);
10388 10412 }
10389 10413
10390 10414 static int
10391 10415 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
10392 10416 processorid_t cpu, int *factor)
10393 10417 {
10394 10418 cpu_t *cp;
10395 10419 dtrace_buffer_t *buf;
10396 10420 int allocated = 0, desired = 0;
10397 10421
10398 10422 ASSERT(MUTEX_HELD(&cpu_lock));
10399 10423 ASSERT(MUTEX_HELD(&dtrace_lock));
10400 10424
10401 10425 *factor = 1;
10402 10426
10403 10427 if (size > dtrace_nonroot_maxsize &&
10404 10428 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
10405 10429 return (EFBIG);
10406 10430
10407 10431 cp = cpu_list;
10408 10432
10409 10433 do {
10410 10434 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10411 10435 continue;
10412 10436
10413 10437 buf = &bufs[cp->cpu_id];
10414 10438
10415 10439 /*
10416 10440 * If there is already a buffer allocated for this CPU, it
10417 10441 * is only possible that this is a DR event. In this case,
10418 10442 * the buffer size must match our specified size.
10419 10443 */
10420 10444 if (buf->dtb_tomax != NULL) {
10421 10445 ASSERT(buf->dtb_size == size);
10422 10446 continue;
10423 10447 }
10424 10448
10425 10449 ASSERT(buf->dtb_xamot == NULL);
10426 10450
10427 10451 if ((buf->dtb_tomax = kmem_zalloc(size,
10428 10452 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
10429 10453 goto err;
10430 10454
10431 10455 buf->dtb_size = size;
10432 10456 buf->dtb_flags = flags;
10433 10457 buf->dtb_offset = 0;
10434 10458 buf->dtb_drops = 0;
10435 10459
10436 10460 if (flags & DTRACEBUF_NOSWITCH)
10437 10461 continue;
10438 10462
10439 10463 if ((buf->dtb_xamot = kmem_zalloc(size,
10440 10464 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
10441 10465 goto err;
10442 10466 } while ((cp = cp->cpu_next) != cpu_list);
10443 10467
10444 10468 return (0);
10445 10469
10446 10470 err:
10447 10471 cp = cpu_list;
10448 10472
10449 10473 do {
10450 10474 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10451 10475 continue;
10452 10476
10453 10477 buf = &bufs[cp->cpu_id];
10454 10478 desired += 2;
10455 10479
10456 10480 if (buf->dtb_xamot != NULL) {
10457 10481 ASSERT(buf->dtb_tomax != NULL);
10458 10482 ASSERT(buf->dtb_size == size);
10459 10483 kmem_free(buf->dtb_xamot, size);
10460 10484 allocated++;
10461 10485 }
10462 10486
10463 10487 if (buf->dtb_tomax != NULL) {
10464 10488 ASSERT(buf->dtb_size == size);
10465 10489 kmem_free(buf->dtb_tomax, size);
10466 10490 allocated++;
10467 10491 }
10468 10492
10469 10493 buf->dtb_tomax = NULL;
10470 10494 buf->dtb_xamot = NULL;
10471 10495 buf->dtb_size = 0;
10472 10496 } while ((cp = cp->cpu_next) != cpu_list);
10473 10497
10474 10498 *factor = desired / (allocated > 0 ? allocated : 1);
10475 10499
10476 10500 return (ENOMEM);
10477 10501 }
10478 10502
10479 10503 /*
10480 10504 * Note: called from probe context. This function just increments the drop
10481 10505 * count on a buffer. It has been made a function to allow for the
10482 10506 * possibility of understanding the source of mysterious drop counts. (A
10483 10507 * problem for which one may be particularly disappointed that DTrace cannot
10484 10508 * be used to understand DTrace.)
10485 10509 */
10486 10510 static void
10487 10511 dtrace_buffer_drop(dtrace_buffer_t *buf)
10488 10512 {
10489 10513 buf->dtb_drops++;
10490 10514 }
10491 10515
10492 10516 /*
10493 10517 * Note: called from probe context. This function is called to reserve space
10494 10518 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10495 10519 * mstate. Returns the new offset in the buffer, or a negative value if an
10496 10520 * error has occurred.
10497 10521 */
10498 10522 static intptr_t
10499 10523 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
10500 10524 dtrace_state_t *state, dtrace_mstate_t *mstate)
10501 10525 {
10502 10526 intptr_t offs = buf->dtb_offset, soffs;
10503 10527 intptr_t woffs;
10504 10528 caddr_t tomax;
10505 10529 size_t total;
10506 10530
10507 10531 if (buf->dtb_flags & DTRACEBUF_INACTIVE)
10508 10532 return (-1);
10509 10533
10510 10534 if ((tomax = buf->dtb_tomax) == NULL) {
10511 10535 dtrace_buffer_drop(buf);
10512 10536 return (-1);
10513 10537 }
10514 10538
10515 10539 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
10516 10540 while (offs & (align - 1)) {
10517 10541 /*
10518 10542 * Assert that our alignment is off by a number which
10519 10543 * is itself sizeof (uint32_t) aligned.
10520 10544 */
10521 10545 ASSERT(!((align - (offs & (align - 1))) &
10522 10546 (sizeof (uint32_t) - 1)));
10523 10547 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
10524 10548 offs += sizeof (uint32_t);
10525 10549 }
10526 10550
10527 10551 if ((soffs = offs + needed) > buf->dtb_size) {
10528 10552 dtrace_buffer_drop(buf);
10529 10553 return (-1);
10530 10554 }
10531 10555
10532 10556 if (mstate == NULL)
10533 10557 return (offs);
10534 10558
10535 10559 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
10536 10560 mstate->dtms_scratch_size = buf->dtb_size - soffs;
10537 10561 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
10538 10562
10539 10563 return (offs);
10540 10564 }
10541 10565
10542 10566 if (buf->dtb_flags & DTRACEBUF_FILL) {
10543 10567 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
10544 10568 (buf->dtb_flags & DTRACEBUF_FULL))
10545 10569 return (-1);
10546 10570 goto out;
10547 10571 }
10548 10572
10549 10573 total = needed + (offs & (align - 1));
10550 10574
10551 10575 /*
10552 10576 * For a ring buffer, life is quite a bit more complicated. Before
10553 10577 * we can store any padding, we need to adjust our wrapping offset.
10554 10578 * (If we've never before wrapped or we're not about to, no adjustment
10555 10579 * is required.)
10556 10580 */
10557 10581 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
10558 10582 offs + total > buf->dtb_size) {
10559 10583 woffs = buf->dtb_xamot_offset;
10560 10584
10561 10585 if (offs + total > buf->dtb_size) {
10562 10586 /*
10563 10587 * We can't fit in the end of the buffer. First, a
10564 10588 * sanity check that we can fit in the buffer at all.
10565 10589 */
10566 10590 if (total > buf->dtb_size) {
10567 10591 dtrace_buffer_drop(buf);
10568 10592 return (-1);
10569 10593 }
10570 10594
10571 10595 /*
10572 10596 * We're going to be storing at the top of the buffer,
10573 10597 * so now we need to deal with the wrapped offset. We
10574 10598 * only reset our wrapped offset to 0 if it is
10575 10599 * currently greater than the current offset. If it
10576 10600 * is less than the current offset, it is because a
10577 10601 * previous allocation induced a wrap -- but the
10578 10602 * allocation didn't subsequently take the space due
10579 10603 * to an error or false predicate evaluation. In this
10580 10604 * case, we'll just leave the wrapped offset alone: if
10581 10605 * the wrapped offset hasn't been advanced far enough
10582 10606 * for this allocation, it will be adjusted in the
10583 10607 * lower loop.
10584 10608 */
10585 10609 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
10586 10610 if (woffs >= offs)
10587 10611 woffs = 0;
10588 10612 } else {
10589 10613 woffs = 0;
10590 10614 }
10591 10615
10592 10616 /*
10593 10617 * Now we know that we're going to be storing to the
10594 10618 * top of the buffer and that there is room for us
10595 10619 * there. We need to clear the buffer from the current
10596 10620 * offset to the end (there may be old gunk there).
10597 10621 */
10598 10622 while (offs < buf->dtb_size)
10599 10623 tomax[offs++] = 0;
10600 10624
10601 10625 /*
10602 10626 * We need to set our offset to zero. And because we
10603 10627 * are wrapping, we need to set the bit indicating as
10604 10628 * much. We can also adjust our needed space back
10605 10629 * down to the space required by the ECB -- we know
10606 10630 * that the top of the buffer is aligned.
10607 10631 */
10608 10632 offs = 0;
10609 10633 total = needed;
10610 10634 buf->dtb_flags |= DTRACEBUF_WRAPPED;
10611 10635 } else {
10612 10636 /*
10613 10637 * There is room for us in the buffer, so we simply
10614 10638 * need to check the wrapped offset.
10615 10639 */
10616 10640 if (woffs < offs) {
10617 10641 /*
10618 10642 * The wrapped offset is less than the offset.
10619 10643 * This can happen if we allocated buffer space
10620 10644 * that induced a wrap, but then we didn't
10621 10645 * subsequently take the space due to an error
10622 10646 * or false predicate evaluation. This is
10623 10647 * okay; we know that _this_ allocation isn't
10624 10648 * going to induce a wrap. We still can't
10625 10649 * reset the wrapped offset to be zero,
10626 10650 * however: the space may have been trashed in
10627 10651 * the previous failed probe attempt. But at
10628 10652 * least the wrapped offset doesn't need to
10629 10653 * be adjusted at all...
10630 10654 */
10631 10655 goto out;
10632 10656 }
10633 10657 }
10634 10658
10635 10659 while (offs + total > woffs) {
10636 10660 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
10637 10661 size_t size;
10638 10662
10639 10663 if (epid == DTRACE_EPIDNONE) {
10640 10664 size = sizeof (uint32_t);
10641 10665 } else {
10642 10666 ASSERT(epid <= state->dts_necbs);
10643 10667 ASSERT(state->dts_ecbs[epid - 1] != NULL);
10644 10668
10645 10669 size = state->dts_ecbs[epid - 1]->dte_size;
10646 10670 }
10647 10671
10648 10672 ASSERT(woffs + size <= buf->dtb_size);
10649 10673 ASSERT(size != 0);
10650 10674
10651 10675 if (woffs + size == buf->dtb_size) {
10652 10676 /*
10653 10677 * We've reached the end of the buffer; we want
10654 10678 * to set the wrapped offset to 0 and break
10655 10679 * out. However, if the offs is 0, then we're
10656 10680 * in a strange edge-condition: the amount of
10657 10681 * space that we want to reserve plus the size
10658 10682 * of the record that we're overwriting is
10659 10683 * greater than the size of the buffer. This
10660 10684 * is problematic because if we reserve the
10661 10685 * space but subsequently don't consume it (due
10662 10686 * to a failed predicate or error) the wrapped
10663 10687 * offset will be 0 -- yet the EPID at offset 0
10664 10688 * will not be committed. This situation is
10665 10689 * relatively easy to deal with: if we're in
10666 10690 * this case, the buffer is indistinguishable
10667 10691 * from one that hasn't wrapped; we need only
10668 10692 * finish the job by clearing the wrapped bit,
10669 10693 * explicitly setting the offset to be 0, and
10670 10694 * zero'ing out the old data in the buffer.
10671 10695 */
10672 10696 if (offs == 0) {
10673 10697 buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
10674 10698 buf->dtb_offset = 0;
10675 10699 woffs = total;
10676 10700
10677 10701 while (woffs < buf->dtb_size)
10678 10702 tomax[woffs++] = 0;
10679 10703 }
10680 10704
10681 10705 woffs = 0;
10682 10706 break;
10683 10707 }
10684 10708
10685 10709 woffs += size;
10686 10710 }
10687 10711
10688 10712 /*
10689 10713 * We have a wrapped offset. It may be that the wrapped offset
10690 10714 * has become zero -- that's okay.
10691 10715 */
10692 10716 buf->dtb_xamot_offset = woffs;
10693 10717 }
10694 10718
10695 10719 out:
10696 10720 /*
10697 10721 * Now we can plow the buffer with any necessary padding.
10698 10722 */
10699 10723 while (offs & (align - 1)) {
10700 10724 /*
10701 10725 * Assert that our alignment is off by a number which
10702 10726 * is itself sizeof (uint32_t) aligned.
10703 10727 */
10704 10728 ASSERT(!((align - (offs & (align - 1))) &
10705 10729 (sizeof (uint32_t) - 1)));
10706 10730 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
10707 10731 offs += sizeof (uint32_t);
10708 10732 }
10709 10733
10710 10734 if (buf->dtb_flags & DTRACEBUF_FILL) {
10711 10735 if (offs + needed > buf->dtb_size - state->dts_reserve) {
10712 10736 buf->dtb_flags |= DTRACEBUF_FULL;
10713 10737 return (-1);
10714 10738 }
10715 10739 }
10716 10740
10717 10741 if (mstate == NULL)
10718 10742 return (offs);
10719 10743
10720 10744 /*
10721 10745 * For ring buffers and fill buffers, the scratch space is always
10722 10746 * the inactive buffer.
10723 10747 */
10724 10748 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
10725 10749 mstate->dtms_scratch_size = buf->dtb_size;
10726 10750 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
10727 10751
10728 10752 return (offs);
10729 10753 }
10730 10754
10731 10755 static void
10732 10756 dtrace_buffer_polish(dtrace_buffer_t *buf)
10733 10757 {
10734 10758 ASSERT(buf->dtb_flags & DTRACEBUF_RING);
10735 10759 ASSERT(MUTEX_HELD(&dtrace_lock));
10736 10760
10737 10761 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
10738 10762 return;
10739 10763
10740 10764 /*
10741 10765 * We need to polish the ring buffer. There are three cases:
10742 10766 *
10743 10767 * - The first (and presumably most common) is that there is no gap
10744 10768 * between the buffer offset and the wrapped offset. In this case,
10745 10769 * there is nothing in the buffer that isn't valid data; we can
10746 10770 * mark the buffer as polished and return.
10747 10771 *
10748 10772 * - The second (less common than the first but still more common
10749 10773 * than the third) is that there is a gap between the buffer offset
10750 10774 * and the wrapped offset, and the wrapped offset is larger than the
10751 10775 * buffer offset. This can happen because of an alignment issue, or
10752 10776 * can happen because of a call to dtrace_buffer_reserve() that
10753 10777 * didn't subsequently consume the buffer space. In this case,
10754 10778 * we need to zero the data from the buffer offset to the wrapped
10755 10779 * offset.
10756 10780 *
10757 10781 * - The third (and least common) is that there is a gap between the
10758 10782 * buffer offset and the wrapped offset, but the wrapped offset is
10759 10783 * _less_ than the buffer offset. This can only happen because a
10760 10784 * call to dtrace_buffer_reserve() induced a wrap, but the space
10761 10785 * was not subsequently consumed. In this case, we need to zero the
10762 10786 * space from the offset to the end of the buffer _and_ from the
10763 10787 * top of the buffer to the wrapped offset.
10764 10788 */
10765 10789 if (buf->dtb_offset < buf->dtb_xamot_offset) {
10766 10790 bzero(buf->dtb_tomax + buf->dtb_offset,
10767 10791 buf->dtb_xamot_offset - buf->dtb_offset);
10768 10792 }
10769 10793
10770 10794 if (buf->dtb_offset > buf->dtb_xamot_offset) {
10771 10795 bzero(buf->dtb_tomax + buf->dtb_offset,
10772 10796 buf->dtb_size - buf->dtb_offset);
10773 10797 bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
10774 10798 }
10775 10799 }
10776 10800
10777 10801 /*
10778 10802 * This routine determines if data generated at the specified time has likely
10779 10803 * been entirely consumed at user-level. This routine is called to determine
10780 10804 * if an ECB on a defunct probe (but for an active enabling) can be safely
10781 10805 * disabled and destroyed.
10782 10806 */
10783 10807 static int
10784 10808 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
10785 10809 {
10786 10810 int i;
10787 10811
10788 10812 for (i = 0; i < NCPU; i++) {
10789 10813 dtrace_buffer_t *buf = &bufs[i];
10790 10814
10791 10815 if (buf->dtb_size == 0)
10792 10816 continue;
10793 10817
10794 10818 if (buf->dtb_flags & DTRACEBUF_RING)
10795 10819 return (0);
10796 10820
10797 10821 if (!buf->dtb_switched && buf->dtb_offset != 0)
10798 10822 return (0);
10799 10823
10800 10824 if (buf->dtb_switched - buf->dtb_interval < when)
10801 10825 return (0);
10802 10826 }
10803 10827
10804 10828 return (1);
10805 10829 }
10806 10830
10807 10831 static void
10808 10832 dtrace_buffer_free(dtrace_buffer_t *bufs)
10809 10833 {
10810 10834 int i;
10811 10835
10812 10836 for (i = 0; i < NCPU; i++) {
10813 10837 dtrace_buffer_t *buf = &bufs[i];
10814 10838
10815 10839 if (buf->dtb_tomax == NULL) {
10816 10840 ASSERT(buf->dtb_xamot == NULL);
10817 10841 ASSERT(buf->dtb_size == 0);
10818 10842 continue;
10819 10843 }
10820 10844
10821 10845 if (buf->dtb_xamot != NULL) {
10822 10846 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10823 10847 kmem_free(buf->dtb_xamot, buf->dtb_size);
10824 10848 }
10825 10849
10826 10850 kmem_free(buf->dtb_tomax, buf->dtb_size);
10827 10851 buf->dtb_size = 0;
10828 10852 buf->dtb_tomax = NULL;
10829 10853 buf->dtb_xamot = NULL;
10830 10854 }
10831 10855 }
10832 10856
10833 10857 /*
10834 10858 * DTrace Enabling Functions
10835 10859 */
10836 10860 static dtrace_enabling_t *
10837 10861 dtrace_enabling_create(dtrace_vstate_t *vstate)
10838 10862 {
10839 10863 dtrace_enabling_t *enab;
10840 10864
10841 10865 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
10842 10866 enab->dten_vstate = vstate;
10843 10867
10844 10868 return (enab);
10845 10869 }
10846 10870
10847 10871 static void
10848 10872 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
10849 10873 {
10850 10874 dtrace_ecbdesc_t **ndesc;
10851 10875 size_t osize, nsize;
10852 10876
10853 10877 /*
10854 10878 * We can't add to enablings after we've enabled them, or after we've
10855 10879 * retained them.
10856 10880 */
10857 10881 ASSERT(enab->dten_probegen == 0);
10858 10882 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
10859 10883
10860 10884 if (enab->dten_ndesc < enab->dten_maxdesc) {
10861 10885 enab->dten_desc[enab->dten_ndesc++] = ecb;
10862 10886 return;
10863 10887 }
10864 10888
10865 10889 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
10866 10890
10867 10891 if (enab->dten_maxdesc == 0) {
10868 10892 enab->dten_maxdesc = 1;
10869 10893 } else {
10870 10894 enab->dten_maxdesc <<= 1;
10871 10895 }
10872 10896
10873 10897 ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
10874 10898
10875 10899 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
10876 10900 ndesc = kmem_zalloc(nsize, KM_SLEEP);
10877 10901 bcopy(enab->dten_desc, ndesc, osize);
10878 10902 kmem_free(enab->dten_desc, osize);
10879 10903
10880 10904 enab->dten_desc = ndesc;
10881 10905 enab->dten_desc[enab->dten_ndesc++] = ecb;
10882 10906 }
10883 10907
10884 10908 static void
10885 10909 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
10886 10910 dtrace_probedesc_t *pd)
10887 10911 {
10888 10912 dtrace_ecbdesc_t *new;
10889 10913 dtrace_predicate_t *pred;
10890 10914 dtrace_actdesc_t *act;
10891 10915
10892 10916 /*
10893 10917 * We're going to create a new ECB description that matches the
10894 10918 * specified ECB in every way, but has the specified probe description.
10895 10919 */
10896 10920 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
10897 10921
10898 10922 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
10899 10923 dtrace_predicate_hold(pred);
10900 10924
10901 10925 for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
10902 10926 dtrace_actdesc_hold(act);
10903 10927
10904 10928 new->dted_action = ecb->dted_action;
10905 10929 new->dted_pred = ecb->dted_pred;
10906 10930 new->dted_probe = *pd;
10907 10931 new->dted_uarg = ecb->dted_uarg;
10908 10932
10909 10933 dtrace_enabling_add(enab, new);
10910 10934 }
10911 10935
10912 10936 static void
10913 10937 dtrace_enabling_dump(dtrace_enabling_t *enab)
10914 10938 {
10915 10939 int i;
10916 10940
10917 10941 for (i = 0; i < enab->dten_ndesc; i++) {
10918 10942 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
10919 10943
10920 10944 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
10921 10945 desc->dtpd_provider, desc->dtpd_mod,
10922 10946 desc->dtpd_func, desc->dtpd_name);
10923 10947 }
10924 10948 }
10925 10949
10926 10950 static void
10927 10951 dtrace_enabling_destroy(dtrace_enabling_t *enab)
10928 10952 {
10929 10953 int i;
10930 10954 dtrace_ecbdesc_t *ep;
10931 10955 dtrace_vstate_t *vstate = enab->dten_vstate;
10932 10956
10933 10957 ASSERT(MUTEX_HELD(&dtrace_lock));
10934 10958
10935 10959 for (i = 0; i < enab->dten_ndesc; i++) {
10936 10960 dtrace_actdesc_t *act, *next;
10937 10961 dtrace_predicate_t *pred;
10938 10962
10939 10963 ep = enab->dten_desc[i];
10940 10964
10941 10965 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
10942 10966 dtrace_predicate_release(pred, vstate);
10943 10967
10944 10968 for (act = ep->dted_action; act != NULL; act = next) {
10945 10969 next = act->dtad_next;
10946 10970 dtrace_actdesc_release(act, vstate);
10947 10971 }
10948 10972
10949 10973 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
10950 10974 }
10951 10975
10952 10976 kmem_free(enab->dten_desc,
10953 10977 enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
10954 10978
10955 10979 /*
10956 10980 * If this was a retained enabling, decrement the dts_nretained count
10957 10981 * and take it off of the dtrace_retained list.
10958 10982 */
10959 10983 if (enab->dten_prev != NULL || enab->dten_next != NULL ||
10960 10984 dtrace_retained == enab) {
10961 10985 ASSERT(enab->dten_vstate->dtvs_state != NULL);
10962 10986 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
10963 10987 enab->dten_vstate->dtvs_state->dts_nretained--;
10964 10988 dtrace_retained_gen++;
10965 10989 }
10966 10990
10967 10991 if (enab->dten_prev == NULL) {
10968 10992 if (dtrace_retained == enab) {
10969 10993 dtrace_retained = enab->dten_next;
10970 10994
10971 10995 if (dtrace_retained != NULL)
10972 10996 dtrace_retained->dten_prev = NULL;
10973 10997 }
10974 10998 } else {
10975 10999 ASSERT(enab != dtrace_retained);
10976 11000 ASSERT(dtrace_retained != NULL);
10977 11001 enab->dten_prev->dten_next = enab->dten_next;
10978 11002 }
10979 11003
10980 11004 if (enab->dten_next != NULL) {
10981 11005 ASSERT(dtrace_retained != NULL);
10982 11006 enab->dten_next->dten_prev = enab->dten_prev;
10983 11007 }
10984 11008
10985 11009 kmem_free(enab, sizeof (dtrace_enabling_t));
10986 11010 }
10987 11011
10988 11012 static int
10989 11013 dtrace_enabling_retain(dtrace_enabling_t *enab)
10990 11014 {
10991 11015 dtrace_state_t *state;
10992 11016
10993 11017 ASSERT(MUTEX_HELD(&dtrace_lock));
10994 11018 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
10995 11019 ASSERT(enab->dten_vstate != NULL);
10996 11020
10997 11021 state = enab->dten_vstate->dtvs_state;
10998 11022 ASSERT(state != NULL);
10999 11023
11000 11024 /*
11001 11025 * We only allow each state to retain dtrace_retain_max enablings.
11002 11026 */
11003 11027 if (state->dts_nretained >= dtrace_retain_max)
11004 11028 return (ENOSPC);
11005 11029
11006 11030 state->dts_nretained++;
11007 11031 dtrace_retained_gen++;
11008 11032
11009 11033 if (dtrace_retained == NULL) {
11010 11034 dtrace_retained = enab;
11011 11035 return (0);
11012 11036 }
11013 11037
11014 11038 enab->dten_next = dtrace_retained;
11015 11039 dtrace_retained->dten_prev = enab;
11016 11040 dtrace_retained = enab;
11017 11041
11018 11042 return (0);
11019 11043 }
11020 11044
11021 11045 static int
11022 11046 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
11023 11047 dtrace_probedesc_t *create)
11024 11048 {
11025 11049 dtrace_enabling_t *new, *enab;
11026 11050 int found = 0, err = ENOENT;
11027 11051
11028 11052 ASSERT(MUTEX_HELD(&dtrace_lock));
11029 11053 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
11030 11054 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
11031 11055 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
11032 11056 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
11033 11057
11034 11058 new = dtrace_enabling_create(&state->dts_vstate);
11035 11059
11036 11060 /*
11037 11061 * Iterate over all retained enablings, looking for enablings that
11038 11062 * match the specified state.
11039 11063 */
11040 11064 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11041 11065 int i;
11042 11066
11043 11067 /*
11044 11068 * dtvs_state can only be NULL for helper enablings -- and
11045 11069 * helper enablings can't be retained.
11046 11070 */
11047 11071 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11048 11072
11049 11073 if (enab->dten_vstate->dtvs_state != state)
11050 11074 continue;
11051 11075
11052 11076 /*
11053 11077 * Now iterate over each probe description; we're looking for
11054 11078 * an exact match to the specified probe description.
11055 11079 */
11056 11080 for (i = 0; i < enab->dten_ndesc; i++) {
11057 11081 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11058 11082 dtrace_probedesc_t *pd = &ep->dted_probe;
11059 11083
11060 11084 if (strcmp(pd->dtpd_provider, match->dtpd_provider))
11061 11085 continue;
11062 11086
11063 11087 if (strcmp(pd->dtpd_mod, match->dtpd_mod))
11064 11088 continue;
11065 11089
11066 11090 if (strcmp(pd->dtpd_func, match->dtpd_func))
11067 11091 continue;
11068 11092
11069 11093 if (strcmp(pd->dtpd_name, match->dtpd_name))
11070 11094 continue;
11071 11095
11072 11096 /*
11073 11097 * We have a winning probe! Add it to our growing
11074 11098 * enabling.
11075 11099 */
11076 11100 found = 1;
11077 11101 dtrace_enabling_addlike(new, ep, create);
11078 11102 }
11079 11103 }
11080 11104
11081 11105 if (!found || (err = dtrace_enabling_retain(new)) != 0) {
11082 11106 dtrace_enabling_destroy(new);
11083 11107 return (err);
11084 11108 }
11085 11109
11086 11110 return (0);
11087 11111 }
11088 11112
11089 11113 static void
11090 11114 dtrace_enabling_retract(dtrace_state_t *state)
11091 11115 {
11092 11116 dtrace_enabling_t *enab, *next;
11093 11117
11094 11118 ASSERT(MUTEX_HELD(&dtrace_lock));
11095 11119
11096 11120 /*
11097 11121 * Iterate over all retained enablings, destroy the enablings retained
11098 11122 * for the specified state.
11099 11123 */
11100 11124 for (enab = dtrace_retained; enab != NULL; enab = next) {
11101 11125 next = enab->dten_next;
11102 11126
11103 11127 /*
11104 11128 * dtvs_state can only be NULL for helper enablings -- and
11105 11129 * helper enablings can't be retained.
11106 11130 */
11107 11131 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11108 11132
11109 11133 if (enab->dten_vstate->dtvs_state == state) {
11110 11134 ASSERT(state->dts_nretained > 0);
11111 11135 dtrace_enabling_destroy(enab);
11112 11136 }
11113 11137 }
11114 11138
11115 11139 ASSERT(state->dts_nretained == 0);
11116 11140 }
11117 11141
11118 11142 static int
11119 11143 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
11120 11144 {
11121 11145 int i = 0;
11122 11146 int total_matched = 0, matched = 0;
11123 11147
11124 11148 ASSERT(MUTEX_HELD(&cpu_lock));
11125 11149 ASSERT(MUTEX_HELD(&dtrace_lock));
11126 11150
11127 11151 for (i = 0; i < enab->dten_ndesc; i++) {
11128 11152 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11129 11153
11130 11154 enab->dten_current = ep;
11131 11155 enab->dten_error = 0;
11132 11156
11133 11157 /*
11134 11158 * If a provider failed to enable a probe then get out and
11135 11159 * let the consumer know we failed.
11136 11160 */
11137 11161 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
11138 11162 return (EBUSY);
11139 11163
11140 11164 total_matched += matched;
11141 11165
11142 11166 if (enab->dten_error != 0) {
11143 11167 /*
11144 11168 * If we get an error half-way through enabling the
11145 11169 * probes, we kick out -- perhaps with some number of
11146 11170 * them enabled. Leaving enabled probes enabled may
11147 11171 * be slightly confusing for user-level, but we expect
11148 11172 * that no one will attempt to actually drive on in
11149 11173 * the face of such errors. If this is an anonymous
11150 11174 * enabling (indicated with a NULL nmatched pointer),
11151 11175 * we cmn_err() a message. We aren't expecting to
11152 11176 * get such an error -- such as it can exist at all,
11153 11177 * it would be a result of corrupted DOF in the driver
11154 11178 * properties.
11155 11179 */
11156 11180 if (nmatched == NULL) {
11157 11181 cmn_err(CE_WARN, "dtrace_enabling_match() "
11158 11182 "error on %p: %d", (void *)ep,
11159 11183 enab->dten_error);
11160 11184 }
11161 11185
11162 11186 return (enab->dten_error);
11163 11187 }
11164 11188 }
11165 11189
11166 11190 enab->dten_probegen = dtrace_probegen;
11167 11191 if (nmatched != NULL)
11168 11192 *nmatched = total_matched;
11169 11193
11170 11194 return (0);
11171 11195 }
11172 11196
11173 11197 static void
11174 11198 dtrace_enabling_matchall(void)
11175 11199 {
11176 11200 dtrace_enabling_t *enab;
11177 11201
11178 11202 mutex_enter(&cpu_lock);
11179 11203 mutex_enter(&dtrace_lock);
11180 11204
11181 11205 /*
11182 11206 * Iterate over all retained enablings to see if any probes match
11183 11207 * against them. We only perform this operation on enablings for which
11184 11208 * we have sufficient permissions by virtue of being in the global zone
11185 11209 * or in the same zone as the DTrace client. Because we can be called
11186 11210 * after dtrace_detach() has been called, we cannot assert that there
11187 11211 * are retained enablings. We can safely load from dtrace_retained,
11188 11212 * however: the taskq_destroy() at the end of dtrace_detach() will
11189 11213 * block pending our completion.
11190 11214 */
11191 11215 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11192 11216 dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
11193 11217 cred_t *cr = dcr->dcr_cred;
11194 11218 zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
11195 11219
11196 11220 if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
11197 11221 (zone == GLOBAL_ZONEID || getzoneid() == zone)))
11198 11222 (void) dtrace_enabling_match(enab, NULL);
11199 11223 }
11200 11224
11201 11225 mutex_exit(&dtrace_lock);
11202 11226 mutex_exit(&cpu_lock);
11203 11227 }
11204 11228
11205 11229 /*
11206 11230 * If an enabling is to be enabled without having matched probes (that is, if
11207 11231 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
11208 11232 * enabling must be _primed_ by creating an ECB for every ECB description.
11209 11233 * This must be done to assure that we know the number of speculations, the
11210 11234 * number of aggregations, the minimum buffer size needed, etc. before we
11211 11235 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
11212 11236 * enabling any probes, we create ECBs for every ECB decription, but with a
11213 11237 * NULL probe -- which is exactly what this function does.
11214 11238 */
11215 11239 static void
11216 11240 dtrace_enabling_prime(dtrace_state_t *state)
11217 11241 {
11218 11242 dtrace_enabling_t *enab;
11219 11243 int i;
11220 11244
11221 11245 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11222 11246 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11223 11247
11224 11248 if (enab->dten_vstate->dtvs_state != state)
11225 11249 continue;
11226 11250
11227 11251 /*
11228 11252 * We don't want to prime an enabling more than once, lest
11229 11253 * we allow a malicious user to induce resource exhaustion.
11230 11254 * (The ECBs that result from priming an enabling aren't
11231 11255 * leaked -- but they also aren't deallocated until the
11232 11256 * consumer state is destroyed.)
11233 11257 */
11234 11258 if (enab->dten_primed)
11235 11259 continue;
11236 11260
11237 11261 for (i = 0; i < enab->dten_ndesc; i++) {
11238 11262 enab->dten_current = enab->dten_desc[i];
11239 11263 (void) dtrace_probe_enable(NULL, enab);
11240 11264 }
11241 11265
11242 11266 enab->dten_primed = 1;
11243 11267 }
11244 11268 }
11245 11269
11246 11270 /*
11247 11271 * Called to indicate that probes should be provided due to retained
11248 11272 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
11249 11273 * must take an initial lap through the enabling calling the dtps_provide()
11250 11274 * entry point explicitly to allow for autocreated probes.
11251 11275 */
11252 11276 static void
11253 11277 dtrace_enabling_provide(dtrace_provider_t *prv)
11254 11278 {
11255 11279 int i, all = 0;
11256 11280 dtrace_probedesc_t desc;
11257 11281 dtrace_genid_t gen;
11258 11282
11259 11283 ASSERT(MUTEX_HELD(&dtrace_lock));
11260 11284 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
11261 11285
11262 11286 if (prv == NULL) {
11263 11287 all = 1;
11264 11288 prv = dtrace_provider;
11265 11289 }
11266 11290
11267 11291 do {
11268 11292 dtrace_enabling_t *enab;
11269 11293 void *parg = prv->dtpv_arg;
11270 11294
11271 11295 retry:
11272 11296 gen = dtrace_retained_gen;
11273 11297 for (enab = dtrace_retained; enab != NULL;
11274 11298 enab = enab->dten_next) {
11275 11299 for (i = 0; i < enab->dten_ndesc; i++) {
11276 11300 desc = enab->dten_desc[i]->dted_probe;
11277 11301 mutex_exit(&dtrace_lock);
11278 11302 prv->dtpv_pops.dtps_provide(parg, &desc);
11279 11303 mutex_enter(&dtrace_lock);
11280 11304 /*
11281 11305 * Process the retained enablings again if
11282 11306 * they have changed while we weren't holding
11283 11307 * dtrace_lock.
11284 11308 */
11285 11309 if (gen != dtrace_retained_gen)
11286 11310 goto retry;
11287 11311 }
11288 11312 }
11289 11313 } while (all && (prv = prv->dtpv_next) != NULL);
11290 11314
11291 11315 mutex_exit(&dtrace_lock);
11292 11316 dtrace_probe_provide(NULL, all ? NULL : prv);
11293 11317 mutex_enter(&dtrace_lock);
11294 11318 }
11295 11319
11296 11320 /*
11297 11321 * Called to reap ECBs that are attached to probes from defunct providers.
11298 11322 */
11299 11323 static void
11300 11324 dtrace_enabling_reap(void)
11301 11325 {
11302 11326 dtrace_provider_t *prov;
11303 11327 dtrace_probe_t *probe;
11304 11328 dtrace_ecb_t *ecb;
11305 11329 hrtime_t when;
11306 11330 int i;
11307 11331
11308 11332 mutex_enter(&cpu_lock);
11309 11333 mutex_enter(&dtrace_lock);
11310 11334
11311 11335 for (i = 0; i < dtrace_nprobes; i++) {
11312 11336 if ((probe = dtrace_probes[i]) == NULL)
11313 11337 continue;
11314 11338
11315 11339 if (probe->dtpr_ecb == NULL)
11316 11340 continue;
11317 11341
11318 11342 prov = probe->dtpr_provider;
11319 11343
11320 11344 if ((when = prov->dtpv_defunct) == 0)
11321 11345 continue;
11322 11346
11323 11347 /*
11324 11348 * We have ECBs on a defunct provider: we want to reap these
11325 11349 * ECBs to allow the provider to unregister. The destruction
11326 11350 * of these ECBs must be done carefully: if we destroy the ECB
11327 11351 * and the consumer later wishes to consume an EPID that
11328 11352 * corresponds to the destroyed ECB (and if the EPID metadata
11329 11353 * has not been previously consumed), the consumer will abort
11330 11354 * processing on the unknown EPID. To reduce (but not, sadly,
11331 11355 * eliminate) the possibility of this, we will only destroy an
11332 11356 * ECB for a defunct provider if, for the state that
11333 11357 * corresponds to the ECB:
11334 11358 *
11335 11359 * (a) There is no speculative tracing (which can effectively
11336 11360 * cache an EPID for an arbitrary amount of time).
11337 11361 *
11338 11362 * (b) The principal buffers have been switched twice since the
11339 11363 * provider became defunct.
11340 11364 *
11341 11365 * (c) The aggregation buffers are of zero size or have been
11342 11366 * switched twice since the provider became defunct.
11343 11367 *
11344 11368 * We use dts_speculates to determine (a) and call a function
11345 11369 * (dtrace_buffer_consumed()) to determine (b) and (c). Note
11346 11370 * that as soon as we've been unable to destroy one of the ECBs
11347 11371 * associated with the probe, we quit trying -- reaping is only
11348 11372 * fruitful in as much as we can destroy all ECBs associated
11349 11373 * with the defunct provider's probes.
11350 11374 */
11351 11375 while ((ecb = probe->dtpr_ecb) != NULL) {
11352 11376 dtrace_state_t *state = ecb->dte_state;
11353 11377 dtrace_buffer_t *buf = state->dts_buffer;
11354 11378 dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
11355 11379
11356 11380 if (state->dts_speculates)
11357 11381 break;
11358 11382
11359 11383 if (!dtrace_buffer_consumed(buf, when))
11360 11384 break;
11361 11385
11362 11386 if (!dtrace_buffer_consumed(aggbuf, when))
11363 11387 break;
11364 11388
11365 11389 dtrace_ecb_disable(ecb);
11366 11390 ASSERT(probe->dtpr_ecb != ecb);
11367 11391 dtrace_ecb_destroy(ecb);
11368 11392 }
11369 11393 }
11370 11394
11371 11395 mutex_exit(&dtrace_lock);
11372 11396 mutex_exit(&cpu_lock);
11373 11397 }
11374 11398
11375 11399 /*
11376 11400 * DTrace DOF Functions
11377 11401 */
11378 11402 /*ARGSUSED*/
11379 11403 static void
11380 11404 dtrace_dof_error(dof_hdr_t *dof, const char *str)
11381 11405 {
11382 11406 if (dtrace_err_verbose)
11383 11407 cmn_err(CE_WARN, "failed to process DOF: %s", str);
11384 11408
11385 11409 #ifdef DTRACE_ERRDEBUG
11386 11410 dtrace_errdebug(str);
11387 11411 #endif
11388 11412 }
11389 11413
11390 11414 /*
11391 11415 * Create DOF out of a currently enabled state. Right now, we only create
11392 11416 * DOF containing the run-time options -- but this could be expanded to create
11393 11417 * complete DOF representing the enabled state.
11394 11418 */
11395 11419 static dof_hdr_t *
11396 11420 dtrace_dof_create(dtrace_state_t *state)
11397 11421 {
11398 11422 dof_hdr_t *dof;
11399 11423 dof_sec_t *sec;
11400 11424 dof_optdesc_t *opt;
11401 11425 int i, len = sizeof (dof_hdr_t) +
11402 11426 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
11403 11427 sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11404 11428
11405 11429 ASSERT(MUTEX_HELD(&dtrace_lock));
11406 11430
11407 11431 dof = kmem_zalloc(len, KM_SLEEP);
11408 11432 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
11409 11433 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
11410 11434 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
11411 11435 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
11412 11436
11413 11437 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
11414 11438 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
11415 11439 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
11416 11440 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
11417 11441 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
11418 11442 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
11419 11443
11420 11444 dof->dofh_flags = 0;
11421 11445 dof->dofh_hdrsize = sizeof (dof_hdr_t);
11422 11446 dof->dofh_secsize = sizeof (dof_sec_t);
11423 11447 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
11424 11448 dof->dofh_secoff = sizeof (dof_hdr_t);
11425 11449 dof->dofh_loadsz = len;
11426 11450 dof->dofh_filesz = len;
11427 11451 dof->dofh_pad = 0;
11428 11452
11429 11453 /*
11430 11454 * Fill in the option section header...
11431 11455 */
11432 11456 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
11433 11457 sec->dofs_type = DOF_SECT_OPTDESC;
11434 11458 sec->dofs_align = sizeof (uint64_t);
11435 11459 sec->dofs_flags = DOF_SECF_LOAD;
11436 11460 sec->dofs_entsize = sizeof (dof_optdesc_t);
11437 11461
11438 11462 opt = (dof_optdesc_t *)((uintptr_t)sec +
11439 11463 roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
11440 11464
11441 11465 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
11442 11466 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11443 11467
11444 11468 for (i = 0; i < DTRACEOPT_MAX; i++) {
11445 11469 opt[i].dofo_option = i;
11446 11470 opt[i].dofo_strtab = DOF_SECIDX_NONE;
11447 11471 opt[i].dofo_value = state->dts_options[i];
11448 11472 }
11449 11473
11450 11474 return (dof);
11451 11475 }
11452 11476
11453 11477 static dof_hdr_t *
11454 11478 dtrace_dof_copyin(uintptr_t uarg, int *errp)
11455 11479 {
11456 11480 dof_hdr_t hdr, *dof;
11457 11481
11458 11482 ASSERT(!MUTEX_HELD(&dtrace_lock));
11459 11483
11460 11484 /*
11461 11485 * First, we're going to copyin() the sizeof (dof_hdr_t).
11462 11486 */
11463 11487 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
11464 11488 dtrace_dof_error(NULL, "failed to copyin DOF header");
11465 11489 *errp = EFAULT;
11466 11490 return (NULL);
11467 11491 }
11468 11492
11469 11493 /*
11470 11494 * Now we'll allocate the entire DOF and copy it in -- provided
11471 11495 * that the length isn't outrageous.
11472 11496 */
11473 11497 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
11474 11498 dtrace_dof_error(&hdr, "load size exceeds maximum");
11475 11499 *errp = E2BIG;
11476 11500 return (NULL);
11477 11501 }
11478 11502
11479 11503 if (hdr.dofh_loadsz < sizeof (hdr)) {
11480 11504 dtrace_dof_error(&hdr, "invalid load size");
11481 11505 *errp = EINVAL;
11482 11506 return (NULL);
11483 11507 }
11484 11508
11485 11509 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
11486 11510
11487 11511 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
11488 11512 dof->dofh_loadsz != hdr.dofh_loadsz) {
11489 11513 kmem_free(dof, hdr.dofh_loadsz);
11490 11514 *errp = EFAULT;
11491 11515 return (NULL);
11492 11516 }
11493 11517
11494 11518 return (dof);
11495 11519 }
11496 11520
11497 11521 static dof_hdr_t *
11498 11522 dtrace_dof_property(const char *name)
11499 11523 {
11500 11524 uchar_t *buf;
11501 11525 uint64_t loadsz;
11502 11526 unsigned int len, i;
11503 11527 dof_hdr_t *dof;
11504 11528
11505 11529 /*
11506 11530 * Unfortunately, array of values in .conf files are always (and
11507 11531 * only) interpreted to be integer arrays. We must read our DOF
11508 11532 * as an integer array, and then squeeze it into a byte array.
11509 11533 */
11510 11534 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
11511 11535 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
11512 11536 return (NULL);
11513 11537
11514 11538 for (i = 0; i < len; i++)
11515 11539 buf[i] = (uchar_t)(((int *)buf)[i]);
11516 11540
11517 11541 if (len < sizeof (dof_hdr_t)) {
11518 11542 ddi_prop_free(buf);
11519 11543 dtrace_dof_error(NULL, "truncated header");
11520 11544 return (NULL);
11521 11545 }
11522 11546
11523 11547 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
11524 11548 ddi_prop_free(buf);
11525 11549 dtrace_dof_error(NULL, "truncated DOF");
11526 11550 return (NULL);
11527 11551 }
11528 11552
11529 11553 if (loadsz >= dtrace_dof_maxsize) {
11530 11554 ddi_prop_free(buf);
11531 11555 dtrace_dof_error(NULL, "oversized DOF");
11532 11556 return (NULL);
11533 11557 }
11534 11558
11535 11559 dof = kmem_alloc(loadsz, KM_SLEEP);
11536 11560 bcopy(buf, dof, loadsz);
11537 11561 ddi_prop_free(buf);
11538 11562
11539 11563 return (dof);
11540 11564 }
11541 11565
11542 11566 static void
11543 11567 dtrace_dof_destroy(dof_hdr_t *dof)
11544 11568 {
11545 11569 kmem_free(dof, dof->dofh_loadsz);
11546 11570 }
11547 11571
11548 11572 /*
11549 11573 * Return the dof_sec_t pointer corresponding to a given section index. If the
11550 11574 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11551 11575 * a type other than DOF_SECT_NONE is specified, the header is checked against
11552 11576 * this type and NULL is returned if the types do not match.
11553 11577 */
11554 11578 static dof_sec_t *
11555 11579 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
11556 11580 {
11557 11581 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
11558 11582 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
11559 11583
11560 11584 if (i >= dof->dofh_secnum) {
11561 11585 dtrace_dof_error(dof, "referenced section index is invalid");
11562 11586 return (NULL);
11563 11587 }
11564 11588
11565 11589 if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
11566 11590 dtrace_dof_error(dof, "referenced section is not loadable");
11567 11591 return (NULL);
11568 11592 }
11569 11593
11570 11594 if (type != DOF_SECT_NONE && type != sec->dofs_type) {
11571 11595 dtrace_dof_error(dof, "referenced section is the wrong type");
11572 11596 return (NULL);
11573 11597 }
11574 11598
11575 11599 return (sec);
11576 11600 }
11577 11601
11578 11602 static dtrace_probedesc_t *
11579 11603 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
11580 11604 {
11581 11605 dof_probedesc_t *probe;
11582 11606 dof_sec_t *strtab;
11583 11607 uintptr_t daddr = (uintptr_t)dof;
11584 11608 uintptr_t str;
11585 11609 size_t size;
11586 11610
11587 11611 if (sec->dofs_type != DOF_SECT_PROBEDESC) {
11588 11612 dtrace_dof_error(dof, "invalid probe section");
11589 11613 return (NULL);
11590 11614 }
11591 11615
11592 11616 if (sec->dofs_align != sizeof (dof_secidx_t)) {
11593 11617 dtrace_dof_error(dof, "bad alignment in probe description");
11594 11618 return (NULL);
11595 11619 }
11596 11620
11597 11621 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
11598 11622 dtrace_dof_error(dof, "truncated probe description");
11599 11623 return (NULL);
11600 11624 }
11601 11625
11602 11626 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
11603 11627 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
11604 11628
11605 11629 if (strtab == NULL)
11606 11630 return (NULL);
11607 11631
11608 11632 str = daddr + strtab->dofs_offset;
11609 11633 size = strtab->dofs_size;
11610 11634
11611 11635 if (probe->dofp_provider >= strtab->dofs_size) {
11612 11636 dtrace_dof_error(dof, "corrupt probe provider");
11613 11637 return (NULL);
11614 11638 }
11615 11639
11616 11640 (void) strncpy(desc->dtpd_provider,
11617 11641 (char *)(str + probe->dofp_provider),
11618 11642 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
11619 11643
11620 11644 if (probe->dofp_mod >= strtab->dofs_size) {
11621 11645 dtrace_dof_error(dof, "corrupt probe module");
11622 11646 return (NULL);
11623 11647 }
11624 11648
11625 11649 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
11626 11650 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
11627 11651
11628 11652 if (probe->dofp_func >= strtab->dofs_size) {
11629 11653 dtrace_dof_error(dof, "corrupt probe function");
11630 11654 return (NULL);
11631 11655 }
11632 11656
11633 11657 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
11634 11658 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
11635 11659
11636 11660 if (probe->dofp_name >= strtab->dofs_size) {
11637 11661 dtrace_dof_error(dof, "corrupt probe name");
11638 11662 return (NULL);
11639 11663 }
11640 11664
11641 11665 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
11642 11666 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
11643 11667
11644 11668 return (desc);
11645 11669 }
11646 11670
11647 11671 static dtrace_difo_t *
11648 11672 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11649 11673 cred_t *cr)
11650 11674 {
11651 11675 dtrace_difo_t *dp;
11652 11676 size_t ttl = 0;
11653 11677 dof_difohdr_t *dofd;
11654 11678 uintptr_t daddr = (uintptr_t)dof;
11655 11679 size_t max = dtrace_difo_maxsize;
11656 11680 int i, l, n;
11657 11681
11658 11682 static const struct {
11659 11683 int section;
11660 11684 int bufoffs;
11661 11685 int lenoffs;
11662 11686 int entsize;
11663 11687 int align;
11664 11688 const char *msg;
11665 11689 } difo[] = {
11666 11690 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
11667 11691 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
11668 11692 sizeof (dif_instr_t), "multiple DIF sections" },
11669 11693
11670 11694 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
11671 11695 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
11672 11696 sizeof (uint64_t), "multiple integer tables" },
11673 11697
11674 11698 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
11675 11699 offsetof(dtrace_difo_t, dtdo_strlen), 0,
11676 11700 sizeof (char), "multiple string tables" },
11677 11701
11678 11702 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
11679 11703 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
11680 11704 sizeof (uint_t), "multiple variable tables" },
11681 11705
11682 11706 { DOF_SECT_NONE, 0, 0, 0, NULL }
11683 11707 };
11684 11708
11685 11709 if (sec->dofs_type != DOF_SECT_DIFOHDR) {
11686 11710 dtrace_dof_error(dof, "invalid DIFO header section");
11687 11711 return (NULL);
11688 11712 }
11689 11713
11690 11714 if (sec->dofs_align != sizeof (dof_secidx_t)) {
11691 11715 dtrace_dof_error(dof, "bad alignment in DIFO header");
11692 11716 return (NULL);
11693 11717 }
11694 11718
11695 11719 if (sec->dofs_size < sizeof (dof_difohdr_t) ||
11696 11720 sec->dofs_size % sizeof (dof_secidx_t)) {
11697 11721 dtrace_dof_error(dof, "bad size in DIFO header");
11698 11722 return (NULL);
11699 11723 }
11700 11724
11701 11725 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
11702 11726 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
11703 11727
11704 11728 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
11705 11729 dp->dtdo_rtype = dofd->dofd_rtype;
11706 11730
11707 11731 for (l = 0; l < n; l++) {
11708 11732 dof_sec_t *subsec;
11709 11733 void **bufp;
11710 11734 uint32_t *lenp;
11711 11735
11712 11736 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
11713 11737 dofd->dofd_links[l])) == NULL)
11714 11738 goto err; /* invalid section link */
11715 11739
11716 11740 if (ttl + subsec->dofs_size > max) {
11717 11741 dtrace_dof_error(dof, "exceeds maximum size");
11718 11742 goto err;
11719 11743 }
11720 11744
11721 11745 ttl += subsec->dofs_size;
11722 11746
11723 11747 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
11724 11748 if (subsec->dofs_type != difo[i].section)
11725 11749 continue;
11726 11750
11727 11751 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
11728 11752 dtrace_dof_error(dof, "section not loaded");
11729 11753 goto err;
11730 11754 }
11731 11755
11732 11756 if (subsec->dofs_align != difo[i].align) {
11733 11757 dtrace_dof_error(dof, "bad alignment");
11734 11758 goto err;
11735 11759 }
11736 11760
11737 11761 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
11738 11762 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
11739 11763
11740 11764 if (*bufp != NULL) {
11741 11765 dtrace_dof_error(dof, difo[i].msg);
11742 11766 goto err;
11743 11767 }
11744 11768
11745 11769 if (difo[i].entsize != subsec->dofs_entsize) {
11746 11770 dtrace_dof_error(dof, "entry size mismatch");
11747 11771 goto err;
11748 11772 }
11749 11773
11750 11774 if (subsec->dofs_entsize != 0 &&
11751 11775 (subsec->dofs_size % subsec->dofs_entsize) != 0) {
11752 11776 dtrace_dof_error(dof, "corrupt entry size");
11753 11777 goto err;
11754 11778 }
11755 11779
11756 11780 *lenp = subsec->dofs_size;
11757 11781 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
11758 11782 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
11759 11783 *bufp, subsec->dofs_size);
11760 11784
11761 11785 if (subsec->dofs_entsize != 0)
11762 11786 *lenp /= subsec->dofs_entsize;
11763 11787
11764 11788 break;
11765 11789 }
11766 11790
11767 11791 /*
11768 11792 * If we encounter a loadable DIFO sub-section that is not
11769 11793 * known to us, assume this is a broken program and fail.
11770 11794 */
11771 11795 if (difo[i].section == DOF_SECT_NONE &&
11772 11796 (subsec->dofs_flags & DOF_SECF_LOAD)) {
11773 11797 dtrace_dof_error(dof, "unrecognized DIFO subsection");
11774 11798 goto err;
11775 11799 }
11776 11800 }
11777 11801
11778 11802 if (dp->dtdo_buf == NULL) {
11779 11803 /*
11780 11804 * We can't have a DIF object without DIF text.
11781 11805 */
11782 11806 dtrace_dof_error(dof, "missing DIF text");
11783 11807 goto err;
11784 11808 }
11785 11809
11786 11810 /*
11787 11811 * Before we validate the DIF object, run through the variable table
11788 11812 * looking for the strings -- if any of their size are under, we'll set
11789 11813 * their size to be the system-wide default string size. Note that
11790 11814 * this should _not_ happen if the "strsize" option has been set --
11791 11815 * in this case, the compiler should have set the size to reflect the
11792 11816 * setting of the option.
11793 11817 */
11794 11818 for (i = 0; i < dp->dtdo_varlen; i++) {
11795 11819 dtrace_difv_t *v = &dp->dtdo_vartab[i];
11796 11820 dtrace_diftype_t *t = &v->dtdv_type;
11797 11821
11798 11822 if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
11799 11823 continue;
11800 11824
11801 11825 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
11802 11826 t->dtdt_size = dtrace_strsize_default;
11803 11827 }
11804 11828
11805 11829 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
11806 11830 goto err;
11807 11831
11808 11832 dtrace_difo_init(dp, vstate);
11809 11833 return (dp);
11810 11834
11811 11835 err:
11812 11836 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
11813 11837 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
11814 11838 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
11815 11839 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
11816 11840
11817 11841 kmem_free(dp, sizeof (dtrace_difo_t));
11818 11842 return (NULL);
11819 11843 }
11820 11844
11821 11845 static dtrace_predicate_t *
11822 11846 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11823 11847 cred_t *cr)
11824 11848 {
11825 11849 dtrace_difo_t *dp;
11826 11850
11827 11851 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
11828 11852 return (NULL);
11829 11853
11830 11854 return (dtrace_predicate_create(dp));
11831 11855 }
11832 11856
11833 11857 static dtrace_actdesc_t *
11834 11858 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11835 11859 cred_t *cr)
11836 11860 {
11837 11861 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
11838 11862 dof_actdesc_t *desc;
11839 11863 dof_sec_t *difosec;
11840 11864 size_t offs;
11841 11865 uintptr_t daddr = (uintptr_t)dof;
11842 11866 uint64_t arg;
11843 11867 dtrace_actkind_t kind;
11844 11868
11845 11869 if (sec->dofs_type != DOF_SECT_ACTDESC) {
11846 11870 dtrace_dof_error(dof, "invalid action section");
11847 11871 return (NULL);
11848 11872 }
11849 11873
11850 11874 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
11851 11875 dtrace_dof_error(dof, "truncated action description");
11852 11876 return (NULL);
11853 11877 }
11854 11878
11855 11879 if (sec->dofs_align != sizeof (uint64_t)) {
11856 11880 dtrace_dof_error(dof, "bad alignment in action description");
11857 11881 return (NULL);
11858 11882 }
11859 11883
11860 11884 if (sec->dofs_size < sec->dofs_entsize) {
11861 11885 dtrace_dof_error(dof, "section entry size exceeds total size");
11862 11886 return (NULL);
11863 11887 }
11864 11888
11865 11889 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
11866 11890 dtrace_dof_error(dof, "bad entry size in action description");
11867 11891 return (NULL);
11868 11892 }
11869 11893
11870 11894 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
11871 11895 dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
11872 11896 return (NULL);
11873 11897 }
11874 11898
11875 11899 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
11876 11900 desc = (dof_actdesc_t *)(daddr +
11877 11901 (uintptr_t)sec->dofs_offset + offs);
11878 11902 kind = (dtrace_actkind_t)desc->dofa_kind;
11879 11903
11880 11904 if ((DTRACEACT_ISPRINTFLIKE(kind) &&
11881 11905 (kind != DTRACEACT_PRINTA ||
11882 11906 desc->dofa_strtab != DOF_SECIDX_NONE)) ||
11883 11907 (kind == DTRACEACT_DIFEXPR &&
11884 11908 desc->dofa_strtab != DOF_SECIDX_NONE)) {
11885 11909 dof_sec_t *strtab;
11886 11910 char *str, *fmt;
11887 11911 uint64_t i;
11888 11912
11889 11913 /*
11890 11914 * The argument to these actions is an index into the
11891 11915 * DOF string table. For printf()-like actions, this
11892 11916 * is the format string. For print(), this is the
11893 11917 * CTF type of the expression result.
11894 11918 */
11895 11919 if ((strtab = dtrace_dof_sect(dof,
11896 11920 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
11897 11921 goto err;
11898 11922
11899 11923 str = (char *)((uintptr_t)dof +
11900 11924 (uintptr_t)strtab->dofs_offset);
11901 11925
11902 11926 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
11903 11927 if (str[i] == '\0')
11904 11928 break;
11905 11929 }
11906 11930
11907 11931 if (i >= strtab->dofs_size) {
11908 11932 dtrace_dof_error(dof, "bogus format string");
11909 11933 goto err;
11910 11934 }
11911 11935
11912 11936 if (i == desc->dofa_arg) {
11913 11937 dtrace_dof_error(dof, "empty format string");
11914 11938 goto err;
11915 11939 }
11916 11940
11917 11941 i -= desc->dofa_arg;
11918 11942 fmt = kmem_alloc(i + 1, KM_SLEEP);
11919 11943 bcopy(&str[desc->dofa_arg], fmt, i + 1);
11920 11944 arg = (uint64_t)(uintptr_t)fmt;
11921 11945 } else {
11922 11946 if (kind == DTRACEACT_PRINTA) {
11923 11947 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
11924 11948 arg = 0;
11925 11949 } else {
11926 11950 arg = desc->dofa_arg;
11927 11951 }
11928 11952 }
11929 11953
11930 11954 act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
11931 11955 desc->dofa_uarg, arg);
11932 11956
11933 11957 if (last != NULL) {
11934 11958 last->dtad_next = act;
11935 11959 } else {
11936 11960 first = act;
11937 11961 }
11938 11962
11939 11963 last = act;
11940 11964
11941 11965 if (desc->dofa_difo == DOF_SECIDX_NONE)
11942 11966 continue;
11943 11967
11944 11968 if ((difosec = dtrace_dof_sect(dof,
11945 11969 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
11946 11970 goto err;
11947 11971
11948 11972 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
11949 11973
11950 11974 if (act->dtad_difo == NULL)
11951 11975 goto err;
11952 11976 }
11953 11977
11954 11978 ASSERT(first != NULL);
11955 11979 return (first);
11956 11980
11957 11981 err:
11958 11982 for (act = first; act != NULL; act = next) {
11959 11983 next = act->dtad_next;
11960 11984 dtrace_actdesc_release(act, vstate);
11961 11985 }
11962 11986
11963 11987 return (NULL);
11964 11988 }
11965 11989
11966 11990 static dtrace_ecbdesc_t *
11967 11991 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11968 11992 cred_t *cr)
11969 11993 {
11970 11994 dtrace_ecbdesc_t *ep;
11971 11995 dof_ecbdesc_t *ecb;
11972 11996 dtrace_probedesc_t *desc;
11973 11997 dtrace_predicate_t *pred = NULL;
11974 11998
11975 11999 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
11976 12000 dtrace_dof_error(dof, "truncated ECB description");
11977 12001 return (NULL);
11978 12002 }
11979 12003
11980 12004 if (sec->dofs_align != sizeof (uint64_t)) {
11981 12005 dtrace_dof_error(dof, "bad alignment in ECB description");
11982 12006 return (NULL);
11983 12007 }
11984 12008
11985 12009 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
11986 12010 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
11987 12011
11988 12012 if (sec == NULL)
11989 12013 return (NULL);
11990 12014
11991 12015 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
11992 12016 ep->dted_uarg = ecb->dofe_uarg;
11993 12017 desc = &ep->dted_probe;
11994 12018
11995 12019 if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
11996 12020 goto err;
11997 12021
11998 12022 if (ecb->dofe_pred != DOF_SECIDX_NONE) {
11999 12023 if ((sec = dtrace_dof_sect(dof,
12000 12024 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
12001 12025 goto err;
12002 12026
12003 12027 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
12004 12028 goto err;
12005 12029
12006 12030 ep->dted_pred.dtpdd_predicate = pred;
12007 12031 }
12008 12032
12009 12033 if (ecb->dofe_actions != DOF_SECIDX_NONE) {
12010 12034 if ((sec = dtrace_dof_sect(dof,
12011 12035 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
12012 12036 goto err;
12013 12037
12014 12038 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
12015 12039
12016 12040 if (ep->dted_action == NULL)
12017 12041 goto err;
12018 12042 }
12019 12043
12020 12044 return (ep);
12021 12045
12022 12046 err:
12023 12047 if (pred != NULL)
12024 12048 dtrace_predicate_release(pred, vstate);
12025 12049 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12026 12050 return (NULL);
12027 12051 }
12028 12052
12029 12053 /*
12030 12054 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
12031 12055 * specified DOF. At present, this amounts to simply adding 'ubase' to the
12032 12056 * site of any user SETX relocations to account for load object base address.
12033 12057 * In the future, if we need other relocations, this function can be extended.
12034 12058 */
12035 12059 static int
12036 12060 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase)
12037 12061 {
12038 12062 uintptr_t daddr = (uintptr_t)dof;
12039 12063 dof_relohdr_t *dofr =
12040 12064 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
12041 12065 dof_sec_t *ss, *rs, *ts;
12042 12066 dof_relodesc_t *r;
12043 12067 uint_t i, n;
12044 12068
12045 12069 if (sec->dofs_size < sizeof (dof_relohdr_t) ||
12046 12070 sec->dofs_align != sizeof (dof_secidx_t)) {
12047 12071 dtrace_dof_error(dof, "invalid relocation header");
12048 12072 return (-1);
12049 12073 }
12050 12074
12051 12075 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
12052 12076 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
12053 12077 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
12054 12078
12055 12079 if (ss == NULL || rs == NULL || ts == NULL)
12056 12080 return (-1); /* dtrace_dof_error() has been called already */
12057 12081
12058 12082 if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
12059 12083 rs->dofs_align != sizeof (uint64_t)) {
12060 12084 dtrace_dof_error(dof, "invalid relocation section");
12061 12085 return (-1);
12062 12086 }
12063 12087
12064 12088 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
12065 12089 n = rs->dofs_size / rs->dofs_entsize;
12066 12090
12067 12091 for (i = 0; i < n; i++) {
12068 12092 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
12069 12093
12070 12094 switch (r->dofr_type) {
12071 12095 case DOF_RELO_NONE:
12072 12096 break;
12073 12097 case DOF_RELO_SETX:
12074 12098 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
12075 12099 sizeof (uint64_t) > ts->dofs_size) {
12076 12100 dtrace_dof_error(dof, "bad relocation offset");
12077 12101 return (-1);
12078 12102 }
12079 12103
12080 12104 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
12081 12105 dtrace_dof_error(dof, "misaligned setx relo");
12082 12106 return (-1);
12083 12107 }
12084 12108
12085 12109 *(uint64_t *)taddr += ubase;
12086 12110 break;
12087 12111 default:
12088 12112 dtrace_dof_error(dof, "invalid relocation type");
12089 12113 return (-1);
12090 12114 }
12091 12115
12092 12116 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
12093 12117 }
12094 12118
12095 12119 return (0);
12096 12120 }
12097 12121
12098 12122 /*
12099 12123 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
12100 12124 * header: it should be at the front of a memory region that is at least
12101 12125 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
12102 12126 * size. It need not be validated in any other way.
12103 12127 */
12104 12128 static int
12105 12129 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
12106 12130 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
12107 12131 {
12108 12132 uint64_t len = dof->dofh_loadsz, seclen;
12109 12133 uintptr_t daddr = (uintptr_t)dof;
12110 12134 dtrace_ecbdesc_t *ep;
12111 12135 dtrace_enabling_t *enab;
12112 12136 uint_t i;
12113 12137
12114 12138 ASSERT(MUTEX_HELD(&dtrace_lock));
12115 12139 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
12116 12140
12117 12141 /*
12118 12142 * Check the DOF header identification bytes. In addition to checking
12119 12143 * valid settings, we also verify that unused bits/bytes are zeroed so
12120 12144 * we can use them later without fear of regressing existing binaries.
12121 12145 */
12122 12146 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
12123 12147 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
12124 12148 dtrace_dof_error(dof, "DOF magic string mismatch");
12125 12149 return (-1);
12126 12150 }
12127 12151
12128 12152 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
12129 12153 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
12130 12154 dtrace_dof_error(dof, "DOF has invalid data model");
12131 12155 return (-1);
12132 12156 }
12133 12157
12134 12158 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
12135 12159 dtrace_dof_error(dof, "DOF encoding mismatch");
12136 12160 return (-1);
12137 12161 }
12138 12162
12139 12163 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
12140 12164 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
12141 12165 dtrace_dof_error(dof, "DOF version mismatch");
12142 12166 return (-1);
12143 12167 }
12144 12168
12145 12169 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
12146 12170 dtrace_dof_error(dof, "DOF uses unsupported instruction set");
12147 12171 return (-1);
12148 12172 }
12149 12173
12150 12174 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
12151 12175 dtrace_dof_error(dof, "DOF uses too many integer registers");
12152 12176 return (-1);
12153 12177 }
12154 12178
12155 12179 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
12156 12180 dtrace_dof_error(dof, "DOF uses too many tuple registers");
12157 12181 return (-1);
12158 12182 }
12159 12183
12160 12184 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
12161 12185 if (dof->dofh_ident[i] != 0) {
12162 12186 dtrace_dof_error(dof, "DOF has invalid ident byte set");
12163 12187 return (-1);
12164 12188 }
12165 12189 }
12166 12190
12167 12191 if (dof->dofh_flags & ~DOF_FL_VALID) {
12168 12192 dtrace_dof_error(dof, "DOF has invalid flag bits set");
12169 12193 return (-1);
12170 12194 }
12171 12195
12172 12196 if (dof->dofh_secsize == 0) {
12173 12197 dtrace_dof_error(dof, "zero section header size");
12174 12198 return (-1);
12175 12199 }
12176 12200
12177 12201 /*
12178 12202 * Check that the section headers don't exceed the amount of DOF
12179 12203 * data. Note that we cast the section size and number of sections
12180 12204 * to uint64_t's to prevent possible overflow in the multiplication.
12181 12205 */
12182 12206 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
12183 12207
12184 12208 if (dof->dofh_secoff > len || seclen > len ||
12185 12209 dof->dofh_secoff + seclen > len) {
12186 12210 dtrace_dof_error(dof, "truncated section headers");
12187 12211 return (-1);
12188 12212 }
12189 12213
12190 12214 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
12191 12215 dtrace_dof_error(dof, "misaligned section headers");
12192 12216 return (-1);
12193 12217 }
12194 12218
12195 12219 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
12196 12220 dtrace_dof_error(dof, "misaligned section size");
12197 12221 return (-1);
12198 12222 }
12199 12223
12200 12224 /*
12201 12225 * Take an initial pass through the section headers to be sure that
12202 12226 * the headers don't have stray offsets. If the 'noprobes' flag is
12203 12227 * set, do not permit sections relating to providers, probes, or args.
12204 12228 */
12205 12229 for (i = 0; i < dof->dofh_secnum; i++) {
12206 12230 dof_sec_t *sec = (dof_sec_t *)(daddr +
12207 12231 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12208 12232
12209 12233 if (noprobes) {
12210 12234 switch (sec->dofs_type) {
12211 12235 case DOF_SECT_PROVIDER:
12212 12236 case DOF_SECT_PROBES:
12213 12237 case DOF_SECT_PRARGS:
12214 12238 case DOF_SECT_PROFFS:
12215 12239 dtrace_dof_error(dof, "illegal sections "
12216 12240 "for enabling");
12217 12241 return (-1);
12218 12242 }
12219 12243 }
12220 12244
12221 12245 if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
12222 12246 !(sec->dofs_flags & DOF_SECF_LOAD)) {
12223 12247 dtrace_dof_error(dof, "loadable section with load "
12224 12248 "flag unset");
12225 12249 return (-1);
12226 12250 }
12227 12251
12228 12252 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12229 12253 continue; /* just ignore non-loadable sections */
12230 12254
12231 12255 if (sec->dofs_align & (sec->dofs_align - 1)) {
12232 12256 dtrace_dof_error(dof, "bad section alignment");
12233 12257 return (-1);
12234 12258 }
12235 12259
12236 12260 if (sec->dofs_offset & (sec->dofs_align - 1)) {
12237 12261 dtrace_dof_error(dof, "misaligned section");
12238 12262 return (-1);
12239 12263 }
12240 12264
12241 12265 if (sec->dofs_offset > len || sec->dofs_size > len ||
12242 12266 sec->dofs_offset + sec->dofs_size > len) {
12243 12267 dtrace_dof_error(dof, "corrupt section header");
12244 12268 return (-1);
12245 12269 }
12246 12270
12247 12271 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
12248 12272 sec->dofs_offset + sec->dofs_size - 1) != '\0') {
12249 12273 dtrace_dof_error(dof, "non-terminating string table");
12250 12274 return (-1);
12251 12275 }
12252 12276 }
12253 12277
12254 12278 /*
12255 12279 * Take a second pass through the sections and locate and perform any
12256 12280 * relocations that are present. We do this after the first pass to
12257 12281 * be sure that all sections have had their headers validated.
12258 12282 */
12259 12283 for (i = 0; i < dof->dofh_secnum; i++) {
12260 12284 dof_sec_t *sec = (dof_sec_t *)(daddr +
12261 12285 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12262 12286
12263 12287 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12264 12288 continue; /* skip sections that are not loadable */
12265 12289
12266 12290 switch (sec->dofs_type) {
12267 12291 case DOF_SECT_URELHDR:
12268 12292 if (dtrace_dof_relocate(dof, sec, ubase) != 0)
12269 12293 return (-1);
12270 12294 break;
12271 12295 }
12272 12296 }
12273 12297
12274 12298 if ((enab = *enabp) == NULL)
12275 12299 enab = *enabp = dtrace_enabling_create(vstate);
12276 12300
12277 12301 for (i = 0; i < dof->dofh_secnum; i++) {
12278 12302 dof_sec_t *sec = (dof_sec_t *)(daddr +
12279 12303 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12280 12304
12281 12305 if (sec->dofs_type != DOF_SECT_ECBDESC)
12282 12306 continue;
12283 12307
12284 12308 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
12285 12309 dtrace_enabling_destroy(enab);
12286 12310 *enabp = NULL;
12287 12311 return (-1);
12288 12312 }
12289 12313
12290 12314 dtrace_enabling_add(enab, ep);
12291 12315 }
12292 12316
12293 12317 return (0);
12294 12318 }
12295 12319
12296 12320 /*
12297 12321 * Process DOF for any options. This routine assumes that the DOF has been
12298 12322 * at least processed by dtrace_dof_slurp().
12299 12323 */
12300 12324 static int
12301 12325 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
12302 12326 {
12303 12327 int i, rval;
12304 12328 uint32_t entsize;
12305 12329 size_t offs;
12306 12330 dof_optdesc_t *desc;
12307 12331
12308 12332 for (i = 0; i < dof->dofh_secnum; i++) {
12309 12333 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
12310 12334 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12311 12335
12312 12336 if (sec->dofs_type != DOF_SECT_OPTDESC)
12313 12337 continue;
12314 12338
12315 12339 if (sec->dofs_align != sizeof (uint64_t)) {
12316 12340 dtrace_dof_error(dof, "bad alignment in "
12317 12341 "option description");
12318 12342 return (EINVAL);
12319 12343 }
12320 12344
12321 12345 if ((entsize = sec->dofs_entsize) == 0) {
12322 12346 dtrace_dof_error(dof, "zeroed option entry size");
12323 12347 return (EINVAL);
12324 12348 }
12325 12349
12326 12350 if (entsize < sizeof (dof_optdesc_t)) {
12327 12351 dtrace_dof_error(dof, "bad option entry size");
12328 12352 return (EINVAL);
12329 12353 }
12330 12354
12331 12355 for (offs = 0; offs < sec->dofs_size; offs += entsize) {
12332 12356 desc = (dof_optdesc_t *)((uintptr_t)dof +
12333 12357 (uintptr_t)sec->dofs_offset + offs);
12334 12358
12335 12359 if (desc->dofo_strtab != DOF_SECIDX_NONE) {
12336 12360 dtrace_dof_error(dof, "non-zero option string");
12337 12361 return (EINVAL);
12338 12362 }
12339 12363
12340 12364 if (desc->dofo_value == DTRACEOPT_UNSET) {
12341 12365 dtrace_dof_error(dof, "unset option");
12342 12366 return (EINVAL);
12343 12367 }
12344 12368
12345 12369 if ((rval = dtrace_state_option(state,
12346 12370 desc->dofo_option, desc->dofo_value)) != 0) {
12347 12371 dtrace_dof_error(dof, "rejected option");
12348 12372 return (rval);
12349 12373 }
12350 12374 }
12351 12375 }
12352 12376
12353 12377 return (0);
12354 12378 }
12355 12379
12356 12380 /*
12357 12381 * DTrace Consumer State Functions
12358 12382 */
12359 12383 int
12360 12384 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
12361 12385 {
12362 12386 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
12363 12387 void *base;
12364 12388 uintptr_t limit;
12365 12389 dtrace_dynvar_t *dvar, *next, *start;
12366 12390 int i;
12367 12391
12368 12392 ASSERT(MUTEX_HELD(&dtrace_lock));
12369 12393 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
12370 12394
12371 12395 bzero(dstate, sizeof (dtrace_dstate_t));
12372 12396
12373 12397 if ((dstate->dtds_chunksize = chunksize) == 0)
12374 12398 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
12375 12399
12376 12400 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
12377 12401 size = min;
12378 12402
12379 12403 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12380 12404 return (ENOMEM);
12381 12405
12382 12406 dstate->dtds_size = size;
12383 12407 dstate->dtds_base = base;
12384 12408 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
12385 12409 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
12386 12410
12387 12411 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
12388 12412
12389 12413 if (hashsize != 1 && (hashsize & 1))
12390 12414 hashsize--;
12391 12415
12392 12416 dstate->dtds_hashsize = hashsize;
12393 12417 dstate->dtds_hash = dstate->dtds_base;
12394 12418
12395 12419 /*
12396 12420 * Set all of our hash buckets to point to the single sink, and (if
12397 12421 * it hasn't already been set), set the sink's hash value to be the
12398 12422 * sink sentinel value. The sink is needed for dynamic variable
12399 12423 * lookups to know that they have iterated over an entire, valid hash
12400 12424 * chain.
12401 12425 */
12402 12426 for (i = 0; i < hashsize; i++)
12403 12427 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
12404 12428
12405 12429 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
12406 12430 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
12407 12431
12408 12432 /*
12409 12433 * Determine number of active CPUs. Divide free list evenly among
12410 12434 * active CPUs.
12411 12435 */
12412 12436 start = (dtrace_dynvar_t *)
12413 12437 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
12414 12438 limit = (uintptr_t)base + size;
12415 12439
12416 12440 maxper = (limit - (uintptr_t)start) / NCPU;
12417 12441 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
12418 12442
12419 12443 for (i = 0; i < NCPU; i++) {
12420 12444 dstate->dtds_percpu[i].dtdsc_free = dvar = start;
12421 12445
12422 12446 /*
12423 12447 * If we don't even have enough chunks to make it once through
12424 12448 * NCPUs, we're just going to allocate everything to the first
12425 12449 * CPU. And if we're on the last CPU, we're going to allocate
12426 12450 * whatever is left over. In either case, we set the limit to
12427 12451 * be the limit of the dynamic variable space.
12428 12452 */
12429 12453 if (maxper == 0 || i == NCPU - 1) {
12430 12454 limit = (uintptr_t)base + size;
12431 12455 start = NULL;
12432 12456 } else {
12433 12457 limit = (uintptr_t)start + maxper;
12434 12458 start = (dtrace_dynvar_t *)limit;
12435 12459 }
12436 12460
12437 12461 ASSERT(limit <= (uintptr_t)base + size);
12438 12462
12439 12463 for (;;) {
12440 12464 next = (dtrace_dynvar_t *)((uintptr_t)dvar +
12441 12465 dstate->dtds_chunksize);
12442 12466
12443 12467 if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
12444 12468 break;
12445 12469
12446 12470 dvar->dtdv_next = next;
12447 12471 dvar = next;
12448 12472 }
12449 12473
12450 12474 if (maxper == 0)
12451 12475 break;
12452 12476 }
12453 12477
12454 12478 return (0);
12455 12479 }
12456 12480
12457 12481 void
12458 12482 dtrace_dstate_fini(dtrace_dstate_t *dstate)
12459 12483 {
12460 12484 ASSERT(MUTEX_HELD(&cpu_lock));
12461 12485
12462 12486 if (dstate->dtds_base == NULL)
12463 12487 return;
12464 12488
12465 12489 kmem_free(dstate->dtds_base, dstate->dtds_size);
12466 12490 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
12467 12491 }
12468 12492
12469 12493 static void
12470 12494 dtrace_vstate_fini(dtrace_vstate_t *vstate)
12471 12495 {
12472 12496 /*
12473 12497 * Logical XOR, where are you?
12474 12498 */
12475 12499 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
12476 12500
12477 12501 if (vstate->dtvs_nglobals > 0) {
12478 12502 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
12479 12503 sizeof (dtrace_statvar_t *));
12480 12504 }
12481 12505
12482 12506 if (vstate->dtvs_ntlocals > 0) {
12483 12507 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
12484 12508 sizeof (dtrace_difv_t));
12485 12509 }
12486 12510
12487 12511 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
12488 12512
12489 12513 if (vstate->dtvs_nlocals > 0) {
12490 12514 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
12491 12515 sizeof (dtrace_statvar_t *));
12492 12516 }
12493 12517 }
12494 12518
12495 12519 static void
12496 12520 dtrace_state_clean(dtrace_state_t *state)
12497 12521 {
12498 12522 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
12499 12523 return;
12500 12524
12501 12525 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
12502 12526 dtrace_speculation_clean(state);
12503 12527 }
12504 12528
12505 12529 static void
12506 12530 dtrace_state_deadman(dtrace_state_t *state)
12507 12531 {
12508 12532 hrtime_t now;
12509 12533
12510 12534 dtrace_sync();
12511 12535
12512 12536 now = dtrace_gethrtime();
12513 12537
12514 12538 if (state != dtrace_anon.dta_state &&
12515 12539 now - state->dts_laststatus >= dtrace_deadman_user)
12516 12540 return;
12517 12541
12518 12542 /*
12519 12543 * We must be sure that dts_alive never appears to be less than the
12520 12544 * value upon entry to dtrace_state_deadman(), and because we lack a
12521 12545 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12522 12546 * store INT64_MAX to it, followed by a memory barrier, followed by
12523 12547 * the new value. This assures that dts_alive never appears to be
12524 12548 * less than its true value, regardless of the order in which the
12525 12549 * stores to the underlying storage are issued.
12526 12550 */
12527 12551 state->dts_alive = INT64_MAX;
12528 12552 dtrace_membar_producer();
12529 12553 state->dts_alive = now;
12530 12554 }
12531 12555
12532 12556 dtrace_state_t *
12533 12557 dtrace_state_create(dev_t *devp, cred_t *cr)
12534 12558 {
12535 12559 minor_t minor;
12536 12560 major_t major;
12537 12561 char c[30];
12538 12562 dtrace_state_t *state;
12539 12563 dtrace_optval_t *opt;
12540 12564 int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
12541 12565
12542 12566 ASSERT(MUTEX_HELD(&dtrace_lock));
12543 12567 ASSERT(MUTEX_HELD(&cpu_lock));
12544 12568
12545 12569 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
12546 12570 VM_BESTFIT | VM_SLEEP);
12547 12571
12548 12572 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
12549 12573 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
12550 12574 return (NULL);
12551 12575 }
12552 12576
12553 12577 state = ddi_get_soft_state(dtrace_softstate, minor);
12554 12578 state->dts_epid = DTRACE_EPIDNONE + 1;
12555 12579
12556 12580 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
12557 12581 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
12558 12582 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
12559 12583
12560 12584 if (devp != NULL) {
12561 12585 major = getemajor(*devp);
12562 12586 } else {
12563 12587 major = ddi_driver_major(dtrace_devi);
12564 12588 }
12565 12589
12566 12590 state->dts_dev = makedevice(major, minor);
12567 12591
12568 12592 if (devp != NULL)
12569 12593 *devp = state->dts_dev;
12570 12594
12571 12595 /*
12572 12596 * We allocate NCPU buffers. On the one hand, this can be quite
12573 12597 * a bit of memory per instance (nearly 36K on a Starcat). On the
12574 12598 * other hand, it saves an additional memory reference in the probe
12575 12599 * path.
12576 12600 */
12577 12601 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
12578 12602 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
12579 12603 state->dts_cleaner = CYCLIC_NONE;
12580 12604 state->dts_deadman = CYCLIC_NONE;
12581 12605 state->dts_vstate.dtvs_state = state;
12582 12606
12583 12607 for (i = 0; i < DTRACEOPT_MAX; i++)
12584 12608 state->dts_options[i] = DTRACEOPT_UNSET;
12585 12609
12586 12610 /*
12587 12611 * Set the default options.
12588 12612 */
12589 12613 opt = state->dts_options;
12590 12614 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
12591 12615 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
12592 12616 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
12593 12617 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
12594 12618 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
12595 12619 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
12596 12620 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
12597 12621 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
12598 12622 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
12599 12623 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
12600 12624 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
12601 12625 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
12602 12626 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
12603 12627 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
12604 12628
12605 12629 state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
12606 12630
12607 12631 /*
12608 12632 * Depending on the user credentials, we set flag bits which alter probe
12609 12633 * visibility or the amount of destructiveness allowed. In the case of
12610 12634 * actual anonymous tracing, or the possession of all privileges, all of
12611 12635 * the normal checks are bypassed.
12612 12636 */
12613 12637 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
12614 12638 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
12615 12639 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
12616 12640 } else {
12617 12641 /*
12618 12642 * Set up the credentials for this instantiation. We take a
12619 12643 * hold on the credential to prevent it from disappearing on
12620 12644 * us; this in turn prevents the zone_t referenced by this
12621 12645 * credential from disappearing. This means that we can
12622 12646 * examine the credential and the zone from probe context.
12623 12647 */
12624 12648 crhold(cr);
12625 12649 state->dts_cred.dcr_cred = cr;
12626 12650
12627 12651 /*
12628 12652 * CRA_PROC means "we have *some* privilege for dtrace" and
12629 12653 * unlocks the use of variables like pid, zonename, etc.
12630 12654 */
12631 12655 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
12632 12656 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
12633 12657 state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
12634 12658 }
12635 12659
12636 12660 /*
12637 12661 * dtrace_user allows use of syscall and profile providers.
12638 12662 * If the user also has proc_owner and/or proc_zone, we
12639 12663 * extend the scope to include additional visibility and
12640 12664 * destructive power.
12641 12665 */
12642 12666 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
12643 12667 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
12644 12668 state->dts_cred.dcr_visible |=
12645 12669 DTRACE_CRV_ALLPROC;
12646 12670
12647 12671 state->dts_cred.dcr_action |=
12648 12672 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12649 12673 }
12650 12674
12651 12675 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
12652 12676 state->dts_cred.dcr_visible |=
12653 12677 DTRACE_CRV_ALLZONE;
12654 12678
12655 12679 state->dts_cred.dcr_action |=
12656 12680 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12657 12681 }
12658 12682
12659 12683 /*
12660 12684 * If we have all privs in whatever zone this is,
12661 12685 * we can do destructive things to processes which
12662 12686 * have altered credentials.
12663 12687 */
12664 12688 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
12665 12689 cr->cr_zone->zone_privset)) {
12666 12690 state->dts_cred.dcr_action |=
12667 12691 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
12668 12692 }
12669 12693 }
12670 12694
12671 12695 /*
12672 12696 * Holding the dtrace_kernel privilege also implies that
12673 12697 * the user has the dtrace_user privilege from a visibility
12674 12698 * perspective. But without further privileges, some
12675 12699 * destructive actions are not available.
12676 12700 */
12677 12701 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
12678 12702 /*
12679 12703 * Make all probes in all zones visible. However,
12680 12704 * this doesn't mean that all actions become available
12681 12705 * to all zones.
12682 12706 */
12683 12707 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
12684 12708 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
12685 12709
12686 12710 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
12687 12711 DTRACE_CRA_PROC;
12688 12712 /*
12689 12713 * Holding proc_owner means that destructive actions
12690 12714 * for *this* zone are allowed.
12691 12715 */
12692 12716 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
12693 12717 state->dts_cred.dcr_action |=
12694 12718 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12695 12719
12696 12720 /*
12697 12721 * Holding proc_zone means that destructive actions
12698 12722 * for this user/group ID in all zones is allowed.
12699 12723 */
12700 12724 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
12701 12725 state->dts_cred.dcr_action |=
12702 12726 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12703 12727
12704 12728 /*
12705 12729 * If we have all privs in whatever zone this is,
12706 12730 * we can do destructive things to processes which
12707 12731 * have altered credentials.
12708 12732 */
12709 12733 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
12710 12734 cr->cr_zone->zone_privset)) {
12711 12735 state->dts_cred.dcr_action |=
12712 12736 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
12713 12737 }
12714 12738 }
12715 12739
12716 12740 /*
12717 12741 * Holding the dtrace_proc privilege gives control over fasttrap
12718 12742 * and pid providers. We need to grant wider destructive
12719 12743 * privileges in the event that the user has proc_owner and/or
12720 12744 * proc_zone.
12721 12745 */
12722 12746 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
12723 12747 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
12724 12748 state->dts_cred.dcr_action |=
12725 12749 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12726 12750
12727 12751 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
12728 12752 state->dts_cred.dcr_action |=
12729 12753 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12730 12754 }
12731 12755 }
12732 12756
12733 12757 return (state);
12734 12758 }
12735 12759
12736 12760 static int
12737 12761 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
12738 12762 {
12739 12763 dtrace_optval_t *opt = state->dts_options, size;
12740 12764 processorid_t cpu;
12741 12765 int flags = 0, rval, factor, divisor = 1;
12742 12766
12743 12767 ASSERT(MUTEX_HELD(&dtrace_lock));
12744 12768 ASSERT(MUTEX_HELD(&cpu_lock));
12745 12769 ASSERT(which < DTRACEOPT_MAX);
12746 12770 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
12747 12771 (state == dtrace_anon.dta_state &&
12748 12772 state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
12749 12773
12750 12774 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
12751 12775 return (0);
12752 12776
12753 12777 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
12754 12778 cpu = opt[DTRACEOPT_CPU];
12755 12779
12756 12780 if (which == DTRACEOPT_SPECSIZE)
12757 12781 flags |= DTRACEBUF_NOSWITCH;
12758 12782
12759 12783 if (which == DTRACEOPT_BUFSIZE) {
12760 12784 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
12761 12785 flags |= DTRACEBUF_RING;
12762 12786
12763 12787 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
12764 12788 flags |= DTRACEBUF_FILL;
12765 12789
12766 12790 if (state != dtrace_anon.dta_state ||
12767 12791 state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
12768 12792 flags |= DTRACEBUF_INACTIVE;
12769 12793 }
12770 12794
12771 12795 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
12772 12796 /*
12773 12797 * The size must be 8-byte aligned. If the size is not 8-byte
12774 12798 * aligned, drop it down by the difference.
12775 12799 */
12776 12800 if (size & (sizeof (uint64_t) - 1))
12777 12801 size -= size & (sizeof (uint64_t) - 1);
12778 12802
12779 12803 if (size < state->dts_reserve) {
12780 12804 /*
12781 12805 * Buffers always must be large enough to accommodate
12782 12806 * their prereserved space. We return E2BIG instead
12783 12807 * of ENOMEM in this case to allow for user-level
12784 12808 * software to differentiate the cases.
12785 12809 */
12786 12810 return (E2BIG);
12787 12811 }
12788 12812
12789 12813 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
12790 12814
12791 12815 if (rval != ENOMEM) {
12792 12816 opt[which] = size;
12793 12817 return (rval);
12794 12818 }
12795 12819
12796 12820 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
12797 12821 return (rval);
12798 12822
12799 12823 for (divisor = 2; divisor < factor; divisor <<= 1)
12800 12824 continue;
12801 12825 }
12802 12826
12803 12827 return (ENOMEM);
12804 12828 }
12805 12829
12806 12830 static int
12807 12831 dtrace_state_buffers(dtrace_state_t *state)
12808 12832 {
12809 12833 dtrace_speculation_t *spec = state->dts_speculations;
12810 12834 int rval, i;
12811 12835
12812 12836 if ((rval = dtrace_state_buffer(state, state->dts_buffer,
12813 12837 DTRACEOPT_BUFSIZE)) != 0)
12814 12838 return (rval);
12815 12839
12816 12840 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
12817 12841 DTRACEOPT_AGGSIZE)) != 0)
12818 12842 return (rval);
12819 12843
12820 12844 for (i = 0; i < state->dts_nspeculations; i++) {
12821 12845 if ((rval = dtrace_state_buffer(state,
12822 12846 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
12823 12847 return (rval);
12824 12848 }
12825 12849
12826 12850 return (0);
12827 12851 }
12828 12852
12829 12853 static void
12830 12854 dtrace_state_prereserve(dtrace_state_t *state)
12831 12855 {
12832 12856 dtrace_ecb_t *ecb;
12833 12857 dtrace_probe_t *probe;
12834 12858
12835 12859 state->dts_reserve = 0;
12836 12860
12837 12861 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
12838 12862 return;
12839 12863
12840 12864 /*
12841 12865 * If our buffer policy is a "fill" buffer policy, we need to set the
12842 12866 * prereserved space to be the space required by the END probes.
12843 12867 */
12844 12868 probe = dtrace_probes[dtrace_probeid_end - 1];
12845 12869 ASSERT(probe != NULL);
12846 12870
12847 12871 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
12848 12872 if (ecb->dte_state != state)
12849 12873 continue;
12850 12874
12851 12875 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
12852 12876 }
12853 12877 }
12854 12878
12855 12879 static int
12856 12880 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
12857 12881 {
12858 12882 dtrace_optval_t *opt = state->dts_options, sz, nspec;
12859 12883 dtrace_speculation_t *spec;
12860 12884 dtrace_buffer_t *buf;
12861 12885 cyc_handler_t hdlr;
12862 12886 cyc_time_t when;
12863 12887 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
12864 12888 dtrace_icookie_t cookie;
12865 12889
12866 12890 mutex_enter(&cpu_lock);
12867 12891 mutex_enter(&dtrace_lock);
12868 12892
12869 12893 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
12870 12894 rval = EBUSY;
12871 12895 goto out;
12872 12896 }
12873 12897
12874 12898 /*
12875 12899 * Before we can perform any checks, we must prime all of the
12876 12900 * retained enablings that correspond to this state.
12877 12901 */
12878 12902 dtrace_enabling_prime(state);
12879 12903
12880 12904 if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
12881 12905 rval = EACCES;
12882 12906 goto out;
12883 12907 }
12884 12908
12885 12909 dtrace_state_prereserve(state);
12886 12910
12887 12911 /*
12888 12912 * Now we want to do is try to allocate our speculations.
12889 12913 * We do not automatically resize the number of speculations; if
12890 12914 * this fails, we will fail the operation.
12891 12915 */
12892 12916 nspec = opt[DTRACEOPT_NSPEC];
12893 12917 ASSERT(nspec != DTRACEOPT_UNSET);
12894 12918
12895 12919 if (nspec > INT_MAX) {
12896 12920 rval = ENOMEM;
12897 12921 goto out;
12898 12922 }
12899 12923
12900 12924 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
12901 12925 KM_NOSLEEP | KM_NORMALPRI);
12902 12926
12903 12927 if (spec == NULL) {
12904 12928 rval = ENOMEM;
12905 12929 goto out;
12906 12930 }
12907 12931
12908 12932 state->dts_speculations = spec;
12909 12933 state->dts_nspeculations = (int)nspec;
12910 12934
12911 12935 for (i = 0; i < nspec; i++) {
12912 12936 if ((buf = kmem_zalloc(bufsize,
12913 12937 KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
12914 12938 rval = ENOMEM;
12915 12939 goto err;
12916 12940 }
12917 12941
12918 12942 spec[i].dtsp_buffer = buf;
12919 12943 }
12920 12944
12921 12945 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
12922 12946 if (dtrace_anon.dta_state == NULL) {
12923 12947 rval = ENOENT;
12924 12948 goto out;
12925 12949 }
12926 12950
12927 12951 if (state->dts_necbs != 0) {
12928 12952 rval = EALREADY;
12929 12953 goto out;
12930 12954 }
12931 12955
12932 12956 state->dts_anon = dtrace_anon_grab();
12933 12957 ASSERT(state->dts_anon != NULL);
12934 12958 state = state->dts_anon;
12935 12959
12936 12960 /*
12937 12961 * We want "grabanon" to be set in the grabbed state, so we'll
12938 12962 * copy that option value from the grabbing state into the
12939 12963 * grabbed state.
12940 12964 */
12941 12965 state->dts_options[DTRACEOPT_GRABANON] =
12942 12966 opt[DTRACEOPT_GRABANON];
12943 12967
12944 12968 *cpu = dtrace_anon.dta_beganon;
12945 12969
12946 12970 /*
12947 12971 * If the anonymous state is active (as it almost certainly
12948 12972 * is if the anonymous enabling ultimately matched anything),
12949 12973 * we don't allow any further option processing -- but we
12950 12974 * don't return failure.
12951 12975 */
12952 12976 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
12953 12977 goto out;
12954 12978 }
12955 12979
12956 12980 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
12957 12981 opt[DTRACEOPT_AGGSIZE] != 0) {
12958 12982 if (state->dts_aggregations == NULL) {
12959 12983 /*
12960 12984 * We're not going to create an aggregation buffer
12961 12985 * because we don't have any ECBs that contain
12962 12986 * aggregations -- set this option to 0.
12963 12987 */
12964 12988 opt[DTRACEOPT_AGGSIZE] = 0;
12965 12989 } else {
12966 12990 /*
12967 12991 * If we have an aggregation buffer, we must also have
12968 12992 * a buffer to use as scratch.
12969 12993 */
12970 12994 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
12971 12995 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
12972 12996 opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
12973 12997 }
12974 12998 }
12975 12999 }
12976 13000
12977 13001 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
12978 13002 opt[DTRACEOPT_SPECSIZE] != 0) {
12979 13003 if (!state->dts_speculates) {
12980 13004 /*
12981 13005 * We're not going to create speculation buffers
12982 13006 * because we don't have any ECBs that actually
12983 13007 * speculate -- set the speculation size to 0.
12984 13008 */
12985 13009 opt[DTRACEOPT_SPECSIZE] = 0;
12986 13010 }
12987 13011 }
12988 13012
12989 13013 /*
12990 13014 * The bare minimum size for any buffer that we're actually going to
12991 13015 * do anything to is sizeof (uint64_t).
12992 13016 */
12993 13017 sz = sizeof (uint64_t);
12994 13018
12995 13019 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
12996 13020 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
12997 13021 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
12998 13022 /*
12999 13023 * A buffer size has been explicitly set to 0 (or to a size
13000 13024 * that will be adjusted to 0) and we need the space -- we
13001 13025 * need to return failure. We return ENOSPC to differentiate
13002 13026 * it from failing to allocate a buffer due to failure to meet
13003 13027 * the reserve (for which we return E2BIG).
13004 13028 */
13005 13029 rval = ENOSPC;
13006 13030 goto out;
13007 13031 }
13008 13032
13009 13033 if ((rval = dtrace_state_buffers(state)) != 0)
13010 13034 goto err;
13011 13035
13012 13036 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
13013 13037 sz = dtrace_dstate_defsize;
13014 13038
13015 13039 do {
13016 13040 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
13017 13041
13018 13042 if (rval == 0)
13019 13043 break;
13020 13044
13021 13045 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13022 13046 goto err;
13023 13047 } while (sz >>= 1);
13024 13048
13025 13049 opt[DTRACEOPT_DYNVARSIZE] = sz;
13026 13050
13027 13051 if (rval != 0)
13028 13052 goto err;
13029 13053
13030 13054 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
13031 13055 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
13032 13056
13033 13057 if (opt[DTRACEOPT_CLEANRATE] == 0)
13034 13058 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13035 13059
13036 13060 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
13037 13061 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
13038 13062
13039 13063 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
13040 13064 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13041 13065
13042 13066 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
13043 13067 hdlr.cyh_arg = state;
13044 13068 hdlr.cyh_level = CY_LOW_LEVEL;
13045 13069
13046 13070 when.cyt_when = 0;
13047 13071 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13048 13072
13049 13073 state->dts_cleaner = cyclic_add(&hdlr, &when);
13050 13074
13051 13075 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13052 13076 hdlr.cyh_arg = state;
13053 13077 hdlr.cyh_level = CY_LOW_LEVEL;
13054 13078
13055 13079 when.cyt_when = 0;
13056 13080 when.cyt_interval = dtrace_deadman_interval;
13057 13081
13058 13082 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13059 13083 state->dts_deadman = cyclic_add(&hdlr, &when);
13060 13084
13061 13085 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13062 13086
13063 13087 /*
13064 13088 * Now it's time to actually fire the BEGIN probe. We need to disable
13065 13089 * interrupts here both to record the CPU on which we fired the BEGIN
13066 13090 * probe (the data from this CPU will be processed first at user
13067 13091 * level) and to manually activate the buffer for this CPU.
13068 13092 */
13069 13093 cookie = dtrace_interrupt_disable();
13070 13094 *cpu = CPU->cpu_id;
13071 13095 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13072 13096 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13073 13097
13074 13098 dtrace_probe(dtrace_probeid_begin,
13075 13099 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13076 13100 dtrace_interrupt_enable(cookie);
13077 13101 /*
13078 13102 * We may have had an exit action from a BEGIN probe; only change our
13079 13103 * state to ACTIVE if we're still in WARMUP.
13080 13104 */
13081 13105 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13082 13106 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13083 13107
13084 13108 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
13085 13109 state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
13086 13110
13087 13111 /*
13088 13112 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
13089 13113 * want each CPU to transition its principal buffer out of the
13090 13114 * INACTIVE state. Doing this assures that no CPU will suddenly begin
13091 13115 * processing an ECB halfway down a probe's ECB chain; all CPUs will
13092 13116 * atomically transition from processing none of a state's ECBs to
13093 13117 * processing all of them.
13094 13118 */
13095 13119 dtrace_xcall(DTRACE_CPUALL,
13096 13120 (dtrace_xcall_t)dtrace_buffer_activate, state);
13097 13121 goto out;
13098 13122
13099 13123 err:
13100 13124 dtrace_buffer_free(state->dts_buffer);
13101 13125 dtrace_buffer_free(state->dts_aggbuffer);
13102 13126
13103 13127 if ((nspec = state->dts_nspeculations) == 0) {
13104 13128 ASSERT(state->dts_speculations == NULL);
13105 13129 goto out;
13106 13130 }
13107 13131
13108 13132 spec = state->dts_speculations;
13109 13133 ASSERT(spec != NULL);
13110 13134
13111 13135 for (i = 0; i < state->dts_nspeculations; i++) {
13112 13136 if ((buf = spec[i].dtsp_buffer) == NULL)
13113 13137 break;
13114 13138
13115 13139 dtrace_buffer_free(buf);
13116 13140 kmem_free(buf, bufsize);
13117 13141 }
13118 13142
13119 13143 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13120 13144 state->dts_nspeculations = 0;
13121 13145 state->dts_speculations = NULL;
13122 13146
13123 13147 out:
13124 13148 mutex_exit(&dtrace_lock);
13125 13149 mutex_exit(&cpu_lock);
13126 13150
13127 13151 return (rval);
13128 13152 }
13129 13153
13130 13154 static int
13131 13155 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
13132 13156 {
13133 13157 dtrace_icookie_t cookie;
13134 13158
13135 13159 ASSERT(MUTEX_HELD(&dtrace_lock));
13136 13160
13137 13161 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
13138 13162 state->dts_activity != DTRACE_ACTIVITY_DRAINING)
13139 13163 return (EINVAL);
13140 13164
13141 13165 /*
13142 13166 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
13143 13167 * to be sure that every CPU has seen it. See below for the details
13144 13168 * on why this is done.
13145 13169 */
13146 13170 state->dts_activity = DTRACE_ACTIVITY_DRAINING;
13147 13171 dtrace_sync();
13148 13172
13149 13173 /*
13150 13174 * By this point, it is impossible for any CPU to be still processing
13151 13175 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
13152 13176 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
13153 13177 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
13154 13178 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
13155 13179 * iff we're in the END probe.
13156 13180 */
13157 13181 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
13158 13182 dtrace_sync();
13159 13183 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13160 13184
13161 13185 /*
13162 13186 * Finally, we can release the reserve and call the END probe. We
13163 13187 * disable interrupts across calling the END probe to allow us to
13164 13188 * return the CPU on which we actually called the END probe. This
13165 13189 * allows user-land to be sure that this CPU's principal buffer is
13166 13190 * processed last.
13167 13191 */
13168 13192 state->dts_reserve = 0;
13169 13193
13170 13194 cookie = dtrace_interrupt_disable();
13171 13195 *cpu = CPU->cpu_id;
13172 13196 dtrace_probe(dtrace_probeid_end,
13173 13197 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13174 13198 dtrace_interrupt_enable(cookie);
13175 13199
13176 13200 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13177 13201 dtrace_sync();
13178 13202
13179 13203 return (0);
13180 13204 }
13181 13205
13182 13206 static int
13183 13207 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13184 13208 dtrace_optval_t val)
13185 13209 {
13186 13210 ASSERT(MUTEX_HELD(&dtrace_lock));
13187 13211
13188 13212 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13189 13213 return (EBUSY);
13190 13214
13191 13215 if (option >= DTRACEOPT_MAX)
13192 13216 return (EINVAL);
13193 13217
13194 13218 if (option != DTRACEOPT_CPU && val < 0)
13195 13219 return (EINVAL);
13196 13220
13197 13221 switch (option) {
13198 13222 case DTRACEOPT_DESTRUCTIVE:
13199 13223 if (dtrace_destructive_disallow)
13200 13224 return (EACCES);
13201 13225
13202 13226 state->dts_cred.dcr_destructive = 1;
13203 13227 break;
13204 13228
13205 13229 case DTRACEOPT_BUFSIZE:
13206 13230 case DTRACEOPT_DYNVARSIZE:
13207 13231 case DTRACEOPT_AGGSIZE:
13208 13232 case DTRACEOPT_SPECSIZE:
13209 13233 case DTRACEOPT_STRSIZE:
13210 13234 if (val < 0)
13211 13235 return (EINVAL);
13212 13236
13213 13237 if (val >= LONG_MAX) {
13214 13238 /*
13215 13239 * If this is an otherwise negative value, set it to
13216 13240 * the highest multiple of 128m less than LONG_MAX.
13217 13241 * Technically, we're adjusting the size without
13218 13242 * regard to the buffer resizing policy, but in fact,
13219 13243 * this has no effect -- if we set the buffer size to
13220 13244 * ~LONG_MAX and the buffer policy is ultimately set to
13221 13245 * be "manual", the buffer allocation is guaranteed to
13222 13246 * fail, if only because the allocation requires two
13223 13247 * buffers. (We set the the size to the highest
13224 13248 * multiple of 128m because it ensures that the size
13225 13249 * will remain a multiple of a megabyte when
13226 13250 * repeatedly halved -- all the way down to 15m.)
13227 13251 */
13228 13252 val = LONG_MAX - (1 << 27) + 1;
13229 13253 }
13230 13254 }
13231 13255
13232 13256 state->dts_options[option] = val;
13233 13257
13234 13258 return (0);
13235 13259 }
13236 13260
13237 13261 static void
13238 13262 dtrace_state_destroy(dtrace_state_t *state)
13239 13263 {
13240 13264 dtrace_ecb_t *ecb;
13241 13265 dtrace_vstate_t *vstate = &state->dts_vstate;
13242 13266 minor_t minor = getminor(state->dts_dev);
13243 13267 int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
13244 13268 dtrace_speculation_t *spec = state->dts_speculations;
13245 13269 int nspec = state->dts_nspeculations;
13246 13270 uint32_t match;
13247 13271
13248 13272 ASSERT(MUTEX_HELD(&dtrace_lock));
13249 13273 ASSERT(MUTEX_HELD(&cpu_lock));
13250 13274
13251 13275 /*
13252 13276 * First, retract any retained enablings for this state.
13253 13277 */
13254 13278 dtrace_enabling_retract(state);
13255 13279 ASSERT(state->dts_nretained == 0);
13256 13280
13257 13281 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
13258 13282 state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
13259 13283 /*
13260 13284 * We have managed to come into dtrace_state_destroy() on a
13261 13285 * hot enabling -- almost certainly because of a disorderly
13262 13286 * shutdown of a consumer. (That is, a consumer that is
13263 13287 * exiting without having called dtrace_stop().) In this case,
13264 13288 * we're going to set our activity to be KILLED, and then
13265 13289 * issue a sync to be sure that everyone is out of probe
13266 13290 * context before we start blowing away ECBs.
13267 13291 */
13268 13292 state->dts_activity = DTRACE_ACTIVITY_KILLED;
13269 13293 dtrace_sync();
13270 13294 }
13271 13295
13272 13296 /*
13273 13297 * Release the credential hold we took in dtrace_state_create().
13274 13298 */
13275 13299 if (state->dts_cred.dcr_cred != NULL)
13276 13300 crfree(state->dts_cred.dcr_cred);
13277 13301
13278 13302 /*
13279 13303 * Now we can safely disable and destroy any enabled probes. Because
13280 13304 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
13281 13305 * (especially if they're all enabled), we take two passes through the
13282 13306 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
13283 13307 * in the second we disable whatever is left over.
13284 13308 */
13285 13309 for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
13286 13310 for (i = 0; i < state->dts_necbs; i++) {
13287 13311 if ((ecb = state->dts_ecbs[i]) == NULL)
13288 13312 continue;
13289 13313
13290 13314 if (match && ecb->dte_probe != NULL) {
13291 13315 dtrace_probe_t *probe = ecb->dte_probe;
13292 13316 dtrace_provider_t *prov = probe->dtpr_provider;
13293 13317
13294 13318 if (!(prov->dtpv_priv.dtpp_flags & match))
13295 13319 continue;
13296 13320 }
13297 13321
13298 13322 dtrace_ecb_disable(ecb);
13299 13323 dtrace_ecb_destroy(ecb);
13300 13324 }
13301 13325
13302 13326 if (!match)
13303 13327 break;
13304 13328 }
13305 13329
13306 13330 /*
13307 13331 * Before we free the buffers, perform one more sync to assure that
13308 13332 * every CPU is out of probe context.
13309 13333 */
13310 13334 dtrace_sync();
13311 13335
13312 13336 dtrace_buffer_free(state->dts_buffer);
13313 13337 dtrace_buffer_free(state->dts_aggbuffer);
13314 13338
13315 13339 for (i = 0; i < nspec; i++)
13316 13340 dtrace_buffer_free(spec[i].dtsp_buffer);
13317 13341
13318 13342 if (state->dts_cleaner != CYCLIC_NONE)
13319 13343 cyclic_remove(state->dts_cleaner);
13320 13344
13321 13345 if (state->dts_deadman != CYCLIC_NONE)
13322 13346 cyclic_remove(state->dts_deadman);
13323 13347
13324 13348 dtrace_dstate_fini(&vstate->dtvs_dynvars);
13325 13349 dtrace_vstate_fini(vstate);
13326 13350 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
13327 13351
13328 13352 if (state->dts_aggregations != NULL) {
13329 13353 #ifdef DEBUG
13330 13354 for (i = 0; i < state->dts_naggregations; i++)
13331 13355 ASSERT(state->dts_aggregations[i] == NULL);
13332 13356 #endif
13333 13357 ASSERT(state->dts_naggregations > 0);
13334 13358 kmem_free(state->dts_aggregations,
13335 13359 state->dts_naggregations * sizeof (dtrace_aggregation_t *));
13336 13360 }
13337 13361
13338 13362 kmem_free(state->dts_buffer, bufsize);
13339 13363 kmem_free(state->dts_aggbuffer, bufsize);
13340 13364
13341 13365 for (i = 0; i < nspec; i++)
13342 13366 kmem_free(spec[i].dtsp_buffer, bufsize);
13343 13367
13344 13368 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13345 13369
13346 13370 dtrace_format_destroy(state);
13347 13371
13348 13372 vmem_destroy(state->dts_aggid_arena);
13349 13373 ddi_soft_state_free(dtrace_softstate, minor);
13350 13374 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
13351 13375 }
13352 13376
13353 13377 /*
13354 13378 * DTrace Anonymous Enabling Functions
13355 13379 */
13356 13380 static dtrace_state_t *
13357 13381 dtrace_anon_grab(void)
13358 13382 {
13359 13383 dtrace_state_t *state;
13360 13384
13361 13385 ASSERT(MUTEX_HELD(&dtrace_lock));
13362 13386
13363 13387 if ((state = dtrace_anon.dta_state) == NULL) {
13364 13388 ASSERT(dtrace_anon.dta_enabling == NULL);
13365 13389 return (NULL);
13366 13390 }
13367 13391
13368 13392 ASSERT(dtrace_anon.dta_enabling != NULL);
13369 13393 ASSERT(dtrace_retained != NULL);
13370 13394
13371 13395 dtrace_enabling_destroy(dtrace_anon.dta_enabling);
13372 13396 dtrace_anon.dta_enabling = NULL;
13373 13397 dtrace_anon.dta_state = NULL;
13374 13398
13375 13399 return (state);
13376 13400 }
13377 13401
13378 13402 static void
13379 13403 dtrace_anon_property(void)
13380 13404 {
13381 13405 int i, rv;
13382 13406 dtrace_state_t *state;
13383 13407 dof_hdr_t *dof;
13384 13408 char c[32]; /* enough for "dof-data-" + digits */
13385 13409
13386 13410 ASSERT(MUTEX_HELD(&dtrace_lock));
13387 13411 ASSERT(MUTEX_HELD(&cpu_lock));
13388 13412
13389 13413 for (i = 0; ; i++) {
13390 13414 (void) snprintf(c, sizeof (c), "dof-data-%d", i);
13391 13415
13392 13416 dtrace_err_verbose = 1;
13393 13417
13394 13418 if ((dof = dtrace_dof_property(c)) == NULL) {
13395 13419 dtrace_err_verbose = 0;
13396 13420 break;
13397 13421 }
13398 13422
13399 13423 /*
13400 13424 * We want to create anonymous state, so we need to transition
13401 13425 * the kernel debugger to indicate that DTrace is active. If
13402 13426 * this fails (e.g. because the debugger has modified text in
13403 13427 * some way), we won't continue with the processing.
13404 13428 */
13405 13429 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
13406 13430 cmn_err(CE_NOTE, "kernel debugger active; anonymous "
13407 13431 "enabling ignored.");
13408 13432 dtrace_dof_destroy(dof);
13409 13433 break;
13410 13434 }
13411 13435
13412 13436 /*
13413 13437 * If we haven't allocated an anonymous state, we'll do so now.
13414 13438 */
13415 13439 if ((state = dtrace_anon.dta_state) == NULL) {
13416 13440 state = dtrace_state_create(NULL, NULL);
13417 13441 dtrace_anon.dta_state = state;
13418 13442
13419 13443 if (state == NULL) {
13420 13444 /*
13421 13445 * This basically shouldn't happen: the only
13422 13446 * failure mode from dtrace_state_create() is a
13423 13447 * failure of ddi_soft_state_zalloc() that
13424 13448 * itself should never happen. Still, the
13425 13449 * interface allows for a failure mode, and
13426 13450 * we want to fail as gracefully as possible:
13427 13451 * we'll emit an error message and cease
13428 13452 * processing anonymous state in this case.
13429 13453 */
13430 13454 cmn_err(CE_WARN, "failed to create "
13431 13455 "anonymous state");
13432 13456 dtrace_dof_destroy(dof);
13433 13457 break;
13434 13458 }
13435 13459 }
13436 13460
13437 13461 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
13438 13462 &dtrace_anon.dta_enabling, 0, B_TRUE);
13439 13463
13440 13464 if (rv == 0)
13441 13465 rv = dtrace_dof_options(dof, state);
13442 13466
13443 13467 dtrace_err_verbose = 0;
13444 13468 dtrace_dof_destroy(dof);
13445 13469
13446 13470 if (rv != 0) {
13447 13471 /*
13448 13472 * This is malformed DOF; chuck any anonymous state
13449 13473 * that we created.
13450 13474 */
13451 13475 ASSERT(dtrace_anon.dta_enabling == NULL);
13452 13476 dtrace_state_destroy(state);
13453 13477 dtrace_anon.dta_state = NULL;
13454 13478 break;
13455 13479 }
13456 13480
13457 13481 ASSERT(dtrace_anon.dta_enabling != NULL);
13458 13482 }
13459 13483
13460 13484 if (dtrace_anon.dta_enabling != NULL) {
13461 13485 int rval;
13462 13486
13463 13487 /*
13464 13488 * dtrace_enabling_retain() can only fail because we are
13465 13489 * trying to retain more enablings than are allowed -- but
13466 13490 * we only have one anonymous enabling, and we are guaranteed
13467 13491 * to be allowed at least one retained enabling; we assert
13468 13492 * that dtrace_enabling_retain() returns success.
13469 13493 */
13470 13494 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
13471 13495 ASSERT(rval == 0);
13472 13496
13473 13497 dtrace_enabling_dump(dtrace_anon.dta_enabling);
13474 13498 }
13475 13499 }
13476 13500
13477 13501 /*
13478 13502 * DTrace Helper Functions
13479 13503 */
13480 13504 static void
13481 13505 dtrace_helper_trace(dtrace_helper_action_t *helper,
13482 13506 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
13483 13507 {
13484 13508 uint32_t size, next, nnext, i;
13485 13509 dtrace_helptrace_t *ent;
13486 13510 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
13487 13511
13488 13512 if (!dtrace_helptrace_enabled)
13489 13513 return;
13490 13514
13491 13515 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
13492 13516
13493 13517 /*
13494 13518 * What would a tracing framework be without its own tracing
13495 13519 * framework? (Well, a hell of a lot simpler, for starters...)
13496 13520 */
13497 13521 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
13498 13522 sizeof (uint64_t) - sizeof (uint64_t);
13499 13523
13500 13524 /*
13501 13525 * Iterate until we can allocate a slot in the trace buffer.
13502 13526 */
13503 13527 do {
13504 13528 next = dtrace_helptrace_next;
13505 13529
13506 13530 if (next + size < dtrace_helptrace_bufsize) {
13507 13531 nnext = next + size;
13508 13532 } else {
13509 13533 nnext = size;
13510 13534 }
13511 13535 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
13512 13536
13513 13537 /*
13514 13538 * We have our slot; fill it in.
13515 13539 */
13516 13540 if (nnext == size)
13517 13541 next = 0;
13518 13542
13519 13543 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
13520 13544 ent->dtht_helper = helper;
13521 13545 ent->dtht_where = where;
13522 13546 ent->dtht_nlocals = vstate->dtvs_nlocals;
13523 13547
13524 13548 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
13525 13549 mstate->dtms_fltoffs : -1;
13526 13550 ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
13527 13551 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
13528 13552
13529 13553 for (i = 0; i < vstate->dtvs_nlocals; i++) {
13530 13554 dtrace_statvar_t *svar;
13531 13555
13532 13556 if ((svar = vstate->dtvs_locals[i]) == NULL)
13533 13557 continue;
13534 13558
13535 13559 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
13536 13560 ent->dtht_locals[i] =
13537 13561 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
13538 13562 }
13539 13563 }
13540 13564
13541 13565 static uint64_t
13542 13566 dtrace_helper(int which, dtrace_mstate_t *mstate,
13543 13567 dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
13544 13568 {
13545 13569 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
13546 13570 uint64_t sarg0 = mstate->dtms_arg[0];
13547 13571 uint64_t sarg1 = mstate->dtms_arg[1];
13548 13572 uint64_t rval;
13549 13573 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
13550 13574 dtrace_helper_action_t *helper;
13551 13575 dtrace_vstate_t *vstate;
13552 13576 dtrace_difo_t *pred;
13553 13577 int i, trace = dtrace_helptrace_enabled;
13554 13578
13555 13579 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
13556 13580
13557 13581 if (helpers == NULL)
13558 13582 return (0);
13559 13583
13560 13584 if ((helper = helpers->dthps_actions[which]) == NULL)
13561 13585 return (0);
13562 13586
13563 13587 vstate = &helpers->dthps_vstate;
13564 13588 mstate->dtms_arg[0] = arg0;
13565 13589 mstate->dtms_arg[1] = arg1;
13566 13590
13567 13591 /*
13568 13592 * Now iterate over each helper. If its predicate evaluates to 'true',
13569 13593 * we'll call the corresponding actions. Note that the below calls
13570 13594 * to dtrace_dif_emulate() may set faults in machine state. This is
13571 13595 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13572 13596 * the stored DIF offset with its own (which is the desired behavior).
13573 13597 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13574 13598 * from machine state; this is okay, too.
13575 13599 */
13576 13600 for (; helper != NULL; helper = helper->dtha_next) {
13577 13601 if ((pred = helper->dtha_predicate) != NULL) {
13578 13602 if (trace)
13579 13603 dtrace_helper_trace(helper, mstate, vstate, 0);
13580 13604
13581 13605 if (!dtrace_dif_emulate(pred, mstate, vstate, state))
13582 13606 goto next;
13583 13607
13584 13608 if (*flags & CPU_DTRACE_FAULT)
13585 13609 goto err;
13586 13610 }
13587 13611
13588 13612 for (i = 0; i < helper->dtha_nactions; i++) {
13589 13613 if (trace)
13590 13614 dtrace_helper_trace(helper,
13591 13615 mstate, vstate, i + 1);
13592 13616
13593 13617 rval = dtrace_dif_emulate(helper->dtha_actions[i],
13594 13618 mstate, vstate, state);
13595 13619
13596 13620 if (*flags & CPU_DTRACE_FAULT)
13597 13621 goto err;
13598 13622 }
13599 13623
13600 13624 next:
13601 13625 if (trace)
13602 13626 dtrace_helper_trace(helper, mstate, vstate,
13603 13627 DTRACE_HELPTRACE_NEXT);
13604 13628 }
13605 13629
13606 13630 if (trace)
13607 13631 dtrace_helper_trace(helper, mstate, vstate,
13608 13632 DTRACE_HELPTRACE_DONE);
13609 13633
13610 13634 /*
13611 13635 * Restore the arg0 that we saved upon entry.
13612 13636 */
13613 13637 mstate->dtms_arg[0] = sarg0;
13614 13638 mstate->dtms_arg[1] = sarg1;
13615 13639
13616 13640 return (rval);
13617 13641
13618 13642 err:
13619 13643 if (trace)
13620 13644 dtrace_helper_trace(helper, mstate, vstate,
13621 13645 DTRACE_HELPTRACE_ERR);
13622 13646
13623 13647 /*
13624 13648 * Restore the arg0 that we saved upon entry.
13625 13649 */
13626 13650 mstate->dtms_arg[0] = sarg0;
13627 13651 mstate->dtms_arg[1] = sarg1;
13628 13652
13629 13653 return (NULL);
13630 13654 }
13631 13655
13632 13656 static void
13633 13657 dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
13634 13658 dtrace_vstate_t *vstate)
13635 13659 {
13636 13660 int i;
13637 13661
13638 13662 if (helper->dtha_predicate != NULL)
13639 13663 dtrace_difo_release(helper->dtha_predicate, vstate);
13640 13664
13641 13665 for (i = 0; i < helper->dtha_nactions; i++) {
13642 13666 ASSERT(helper->dtha_actions[i] != NULL);
13643 13667 dtrace_difo_release(helper->dtha_actions[i], vstate);
13644 13668 }
13645 13669
13646 13670 kmem_free(helper->dtha_actions,
13647 13671 helper->dtha_nactions * sizeof (dtrace_difo_t *));
13648 13672 kmem_free(helper, sizeof (dtrace_helper_action_t));
13649 13673 }
13650 13674
13651 13675 static int
13652 13676 dtrace_helper_destroygen(int gen)
13653 13677 {
13654 13678 proc_t *p = curproc;
13655 13679 dtrace_helpers_t *help = p->p_dtrace_helpers;
13656 13680 dtrace_vstate_t *vstate;
13657 13681 int i;
13658 13682
13659 13683 ASSERT(MUTEX_HELD(&dtrace_lock));
13660 13684
13661 13685 if (help == NULL || gen > help->dthps_generation)
13662 13686 return (EINVAL);
13663 13687
13664 13688 vstate = &help->dthps_vstate;
13665 13689
13666 13690 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
13667 13691 dtrace_helper_action_t *last = NULL, *h, *next;
13668 13692
13669 13693 for (h = help->dthps_actions[i]; h != NULL; h = next) {
13670 13694 next = h->dtha_next;
13671 13695
13672 13696 if (h->dtha_generation == gen) {
13673 13697 if (last != NULL) {
13674 13698 last->dtha_next = next;
13675 13699 } else {
13676 13700 help->dthps_actions[i] = next;
13677 13701 }
13678 13702
13679 13703 dtrace_helper_action_destroy(h, vstate);
13680 13704 } else {
13681 13705 last = h;
13682 13706 }
13683 13707 }
13684 13708 }
13685 13709
13686 13710 /*
13687 13711 * Interate until we've cleared out all helper providers with the
13688 13712 * given generation number.
13689 13713 */
13690 13714 for (;;) {
13691 13715 dtrace_helper_provider_t *prov;
13692 13716
13693 13717 /*
13694 13718 * Look for a helper provider with the right generation. We
13695 13719 * have to start back at the beginning of the list each time
13696 13720 * because we drop dtrace_lock. It's unlikely that we'll make
13697 13721 * more than two passes.
13698 13722 */
13699 13723 for (i = 0; i < help->dthps_nprovs; i++) {
13700 13724 prov = help->dthps_provs[i];
13701 13725
13702 13726 if (prov->dthp_generation == gen)
13703 13727 break;
13704 13728 }
13705 13729
13706 13730 /*
13707 13731 * If there were no matches, we're done.
13708 13732 */
13709 13733 if (i == help->dthps_nprovs)
13710 13734 break;
13711 13735
13712 13736 /*
13713 13737 * Move the last helper provider into this slot.
13714 13738 */
13715 13739 help->dthps_nprovs--;
13716 13740 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
13717 13741 help->dthps_provs[help->dthps_nprovs] = NULL;
13718 13742
13719 13743 mutex_exit(&dtrace_lock);
13720 13744
13721 13745 /*
13722 13746 * If we have a meta provider, remove this helper provider.
13723 13747 */
13724 13748 mutex_enter(&dtrace_meta_lock);
13725 13749 if (dtrace_meta_pid != NULL) {
13726 13750 ASSERT(dtrace_deferred_pid == NULL);
13727 13751 dtrace_helper_provider_remove(&prov->dthp_prov,
13728 13752 p->p_pid);
13729 13753 }
13730 13754 mutex_exit(&dtrace_meta_lock);
13731 13755
13732 13756 dtrace_helper_provider_destroy(prov);
13733 13757
13734 13758 mutex_enter(&dtrace_lock);
13735 13759 }
13736 13760
13737 13761 return (0);
13738 13762 }
13739 13763
13740 13764 static int
13741 13765 dtrace_helper_validate(dtrace_helper_action_t *helper)
13742 13766 {
13743 13767 int err = 0, i;
13744 13768 dtrace_difo_t *dp;
13745 13769
13746 13770 if ((dp = helper->dtha_predicate) != NULL)
13747 13771 err += dtrace_difo_validate_helper(dp);
13748 13772
13749 13773 for (i = 0; i < helper->dtha_nactions; i++)
13750 13774 err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
13751 13775
13752 13776 return (err == 0);
13753 13777 }
13754 13778
13755 13779 static int
13756 13780 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep)
13757 13781 {
13758 13782 dtrace_helpers_t *help;
13759 13783 dtrace_helper_action_t *helper, *last;
13760 13784 dtrace_actdesc_t *act;
13761 13785 dtrace_vstate_t *vstate;
13762 13786 dtrace_predicate_t *pred;
13763 13787 int count = 0, nactions = 0, i;
13764 13788
13765 13789 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
13766 13790 return (EINVAL);
13767 13791
13768 13792 help = curproc->p_dtrace_helpers;
13769 13793 last = help->dthps_actions[which];
13770 13794 vstate = &help->dthps_vstate;
13771 13795
13772 13796 for (count = 0; last != NULL; last = last->dtha_next) {
13773 13797 count++;
13774 13798 if (last->dtha_next == NULL)
13775 13799 break;
13776 13800 }
13777 13801
13778 13802 /*
13779 13803 * If we already have dtrace_helper_actions_max helper actions for this
13780 13804 * helper action type, we'll refuse to add a new one.
13781 13805 */
13782 13806 if (count >= dtrace_helper_actions_max)
13783 13807 return (ENOSPC);
13784 13808
13785 13809 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
13786 13810 helper->dtha_generation = help->dthps_generation;
13787 13811
13788 13812 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
13789 13813 ASSERT(pred->dtp_difo != NULL);
13790 13814 dtrace_difo_hold(pred->dtp_difo);
13791 13815 helper->dtha_predicate = pred->dtp_difo;
13792 13816 }
13793 13817
13794 13818 for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
13795 13819 if (act->dtad_kind != DTRACEACT_DIFEXPR)
13796 13820 goto err;
13797 13821
13798 13822 if (act->dtad_difo == NULL)
13799 13823 goto err;
13800 13824
13801 13825 nactions++;
13802 13826 }
13803 13827
13804 13828 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
13805 13829 (helper->dtha_nactions = nactions), KM_SLEEP);
13806 13830
13807 13831 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
13808 13832 dtrace_difo_hold(act->dtad_difo);
13809 13833 helper->dtha_actions[i++] = act->dtad_difo;
13810 13834 }
13811 13835
13812 13836 if (!dtrace_helper_validate(helper))
13813 13837 goto err;
13814 13838
13815 13839 if (last == NULL) {
13816 13840 help->dthps_actions[which] = helper;
13817 13841 } else {
13818 13842 last->dtha_next = helper;
13819 13843 }
13820 13844
13821 13845 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
13822 13846 dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
13823 13847 dtrace_helptrace_next = 0;
13824 13848 }
13825 13849
13826 13850 return (0);
13827 13851 err:
13828 13852 dtrace_helper_action_destroy(helper, vstate);
13829 13853 return (EINVAL);
13830 13854 }
13831 13855
13832 13856 static void
13833 13857 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
13834 13858 dof_helper_t *dofhp)
13835 13859 {
13836 13860 ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
13837 13861
13838 13862 mutex_enter(&dtrace_meta_lock);
13839 13863 mutex_enter(&dtrace_lock);
13840 13864
13841 13865 if (!dtrace_attached() || dtrace_meta_pid == NULL) {
13842 13866 /*
13843 13867 * If the dtrace module is loaded but not attached, or if
13844 13868 * there aren't isn't a meta provider registered to deal with
13845 13869 * these provider descriptions, we need to postpone creating
13846 13870 * the actual providers until later.
13847 13871 */
13848 13872
13849 13873 if (help->dthps_next == NULL && help->dthps_prev == NULL &&
13850 13874 dtrace_deferred_pid != help) {
13851 13875 help->dthps_deferred = 1;
13852 13876 help->dthps_pid = p->p_pid;
13853 13877 help->dthps_next = dtrace_deferred_pid;
13854 13878 help->dthps_prev = NULL;
13855 13879 if (dtrace_deferred_pid != NULL)
13856 13880 dtrace_deferred_pid->dthps_prev = help;
13857 13881 dtrace_deferred_pid = help;
13858 13882 }
13859 13883
13860 13884 mutex_exit(&dtrace_lock);
13861 13885
13862 13886 } else if (dofhp != NULL) {
13863 13887 /*
13864 13888 * If the dtrace module is loaded and we have a particular
13865 13889 * helper provider description, pass that off to the
13866 13890 * meta provider.
13867 13891 */
13868 13892
13869 13893 mutex_exit(&dtrace_lock);
13870 13894
13871 13895 dtrace_helper_provide(dofhp, p->p_pid);
13872 13896
13873 13897 } else {
13874 13898 /*
13875 13899 * Otherwise, just pass all the helper provider descriptions
13876 13900 * off to the meta provider.
13877 13901 */
13878 13902
13879 13903 int i;
13880 13904 mutex_exit(&dtrace_lock);
13881 13905
13882 13906 for (i = 0; i < help->dthps_nprovs; i++) {
13883 13907 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
13884 13908 p->p_pid);
13885 13909 }
13886 13910 }
13887 13911
13888 13912 mutex_exit(&dtrace_meta_lock);
13889 13913 }
13890 13914
13891 13915 static int
13892 13916 dtrace_helper_provider_add(dof_helper_t *dofhp, int gen)
13893 13917 {
13894 13918 dtrace_helpers_t *help;
13895 13919 dtrace_helper_provider_t *hprov, **tmp_provs;
13896 13920 uint_t tmp_maxprovs, i;
13897 13921
13898 13922 ASSERT(MUTEX_HELD(&dtrace_lock));
13899 13923
13900 13924 help = curproc->p_dtrace_helpers;
13901 13925 ASSERT(help != NULL);
13902 13926
13903 13927 /*
13904 13928 * If we already have dtrace_helper_providers_max helper providers,
13905 13929 * we're refuse to add a new one.
13906 13930 */
13907 13931 if (help->dthps_nprovs >= dtrace_helper_providers_max)
13908 13932 return (ENOSPC);
13909 13933
13910 13934 /*
13911 13935 * Check to make sure this isn't a duplicate.
13912 13936 */
13913 13937 for (i = 0; i < help->dthps_nprovs; i++) {
13914 13938 if (dofhp->dofhp_addr ==
13915 13939 help->dthps_provs[i]->dthp_prov.dofhp_addr)
13916 13940 return (EALREADY);
13917 13941 }
13918 13942
13919 13943 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
13920 13944 hprov->dthp_prov = *dofhp;
13921 13945 hprov->dthp_ref = 1;
13922 13946 hprov->dthp_generation = gen;
13923 13947
13924 13948 /*
13925 13949 * Allocate a bigger table for helper providers if it's already full.
13926 13950 */
13927 13951 if (help->dthps_maxprovs == help->dthps_nprovs) {
13928 13952 tmp_maxprovs = help->dthps_maxprovs;
13929 13953 tmp_provs = help->dthps_provs;
13930 13954
13931 13955 if (help->dthps_maxprovs == 0)
13932 13956 help->dthps_maxprovs = 2;
13933 13957 else
13934 13958 help->dthps_maxprovs *= 2;
13935 13959 if (help->dthps_maxprovs > dtrace_helper_providers_max)
13936 13960 help->dthps_maxprovs = dtrace_helper_providers_max;
13937 13961
13938 13962 ASSERT(tmp_maxprovs < help->dthps_maxprovs);
13939 13963
13940 13964 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
13941 13965 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
13942 13966
13943 13967 if (tmp_provs != NULL) {
13944 13968 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
13945 13969 sizeof (dtrace_helper_provider_t *));
13946 13970 kmem_free(tmp_provs, tmp_maxprovs *
13947 13971 sizeof (dtrace_helper_provider_t *));
13948 13972 }
13949 13973 }
13950 13974
13951 13975 help->dthps_provs[help->dthps_nprovs] = hprov;
13952 13976 help->dthps_nprovs++;
13953 13977
13954 13978 return (0);
13955 13979 }
13956 13980
13957 13981 static void
13958 13982 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
13959 13983 {
13960 13984 mutex_enter(&dtrace_lock);
13961 13985
13962 13986 if (--hprov->dthp_ref == 0) {
13963 13987 dof_hdr_t *dof;
13964 13988 mutex_exit(&dtrace_lock);
13965 13989 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
13966 13990 dtrace_dof_destroy(dof);
13967 13991 kmem_free(hprov, sizeof (dtrace_helper_provider_t));
13968 13992 } else {
13969 13993 mutex_exit(&dtrace_lock);
13970 13994 }
13971 13995 }
13972 13996
13973 13997 static int
13974 13998 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
13975 13999 {
13976 14000 uintptr_t daddr = (uintptr_t)dof;
13977 14001 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
13978 14002 dof_provider_t *provider;
13979 14003 dof_probe_t *probe;
13980 14004 uint8_t *arg;
13981 14005 char *strtab, *typestr;
13982 14006 dof_stridx_t typeidx;
13983 14007 size_t typesz;
13984 14008 uint_t nprobes, j, k;
13985 14009
13986 14010 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
13987 14011
13988 14012 if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
13989 14013 dtrace_dof_error(dof, "misaligned section offset");
13990 14014 return (-1);
13991 14015 }
13992 14016
13993 14017 /*
13994 14018 * The section needs to be large enough to contain the DOF provider
13995 14019 * structure appropriate for the given version.
13996 14020 */
13997 14021 if (sec->dofs_size <
13998 14022 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
13999 14023 offsetof(dof_provider_t, dofpv_prenoffs) :
14000 14024 sizeof (dof_provider_t))) {
14001 14025 dtrace_dof_error(dof, "provider section too small");
14002 14026 return (-1);
14003 14027 }
14004 14028
14005 14029 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
14006 14030 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
14007 14031 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
14008 14032 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
14009 14033 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
14010 14034
14011 14035 if (str_sec == NULL || prb_sec == NULL ||
14012 14036 arg_sec == NULL || off_sec == NULL)
14013 14037 return (-1);
14014 14038
14015 14039 enoff_sec = NULL;
14016 14040
14017 14041 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
14018 14042 provider->dofpv_prenoffs != DOF_SECT_NONE &&
14019 14043 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
14020 14044 provider->dofpv_prenoffs)) == NULL)
14021 14045 return (-1);
14022 14046
14023 14047 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
14024 14048
14025 14049 if (provider->dofpv_name >= str_sec->dofs_size ||
14026 14050 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
14027 14051 dtrace_dof_error(dof, "invalid provider name");
14028 14052 return (-1);
14029 14053 }
14030 14054
14031 14055 if (prb_sec->dofs_entsize == 0 ||
14032 14056 prb_sec->dofs_entsize > prb_sec->dofs_size) {
14033 14057 dtrace_dof_error(dof, "invalid entry size");
14034 14058 return (-1);
14035 14059 }
14036 14060
14037 14061 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
14038 14062 dtrace_dof_error(dof, "misaligned entry size");
14039 14063 return (-1);
14040 14064 }
14041 14065
14042 14066 if (off_sec->dofs_entsize != sizeof (uint32_t)) {
14043 14067 dtrace_dof_error(dof, "invalid entry size");
14044 14068 return (-1);
14045 14069 }
14046 14070
14047 14071 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
14048 14072 dtrace_dof_error(dof, "misaligned section offset");
14049 14073 return (-1);
14050 14074 }
14051 14075
14052 14076 if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
14053 14077 dtrace_dof_error(dof, "invalid entry size");
14054 14078 return (-1);
14055 14079 }
14056 14080
14057 14081 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
14058 14082
14059 14083 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
14060 14084
14061 14085 /*
14062 14086 * Take a pass through the probes to check for errors.
14063 14087 */
14064 14088 for (j = 0; j < nprobes; j++) {
14065 14089 probe = (dof_probe_t *)(uintptr_t)(daddr +
14066 14090 prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
14067 14091
14068 14092 if (probe->dofpr_func >= str_sec->dofs_size) {
14069 14093 dtrace_dof_error(dof, "invalid function name");
14070 14094 return (-1);
14071 14095 }
14072 14096
14073 14097 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
14074 14098 dtrace_dof_error(dof, "function name too long");
14075 14099 return (-1);
14076 14100 }
14077 14101
14078 14102 if (probe->dofpr_name >= str_sec->dofs_size ||
14079 14103 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
14080 14104 dtrace_dof_error(dof, "invalid probe name");
14081 14105 return (-1);
14082 14106 }
14083 14107
14084 14108 /*
14085 14109 * The offset count must not wrap the index, and the offsets
14086 14110 * must also not overflow the section's data.
14087 14111 */
14088 14112 if (probe->dofpr_offidx + probe->dofpr_noffs <
14089 14113 probe->dofpr_offidx ||
14090 14114 (probe->dofpr_offidx + probe->dofpr_noffs) *
14091 14115 off_sec->dofs_entsize > off_sec->dofs_size) {
14092 14116 dtrace_dof_error(dof, "invalid probe offset");
14093 14117 return (-1);
14094 14118 }
14095 14119
14096 14120 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
14097 14121 /*
14098 14122 * If there's no is-enabled offset section, make sure
14099 14123 * there aren't any is-enabled offsets. Otherwise
14100 14124 * perform the same checks as for probe offsets
14101 14125 * (immediately above).
14102 14126 */
14103 14127 if (enoff_sec == NULL) {
14104 14128 if (probe->dofpr_enoffidx != 0 ||
14105 14129 probe->dofpr_nenoffs != 0) {
14106 14130 dtrace_dof_error(dof, "is-enabled "
14107 14131 "offsets with null section");
14108 14132 return (-1);
14109 14133 }
14110 14134 } else if (probe->dofpr_enoffidx +
14111 14135 probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
14112 14136 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
14113 14137 enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
14114 14138 dtrace_dof_error(dof, "invalid is-enabled "
14115 14139 "offset");
14116 14140 return (-1);
14117 14141 }
14118 14142
14119 14143 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
14120 14144 dtrace_dof_error(dof, "zero probe and "
14121 14145 "is-enabled offsets");
14122 14146 return (-1);
14123 14147 }
14124 14148 } else if (probe->dofpr_noffs == 0) {
14125 14149 dtrace_dof_error(dof, "zero probe offsets");
14126 14150 return (-1);
14127 14151 }
14128 14152
14129 14153 if (probe->dofpr_argidx + probe->dofpr_xargc <
14130 14154 probe->dofpr_argidx ||
14131 14155 (probe->dofpr_argidx + probe->dofpr_xargc) *
14132 14156 arg_sec->dofs_entsize > arg_sec->dofs_size) {
14133 14157 dtrace_dof_error(dof, "invalid args");
14134 14158 return (-1);
14135 14159 }
14136 14160
14137 14161 typeidx = probe->dofpr_nargv;
14138 14162 typestr = strtab + probe->dofpr_nargv;
14139 14163 for (k = 0; k < probe->dofpr_nargc; k++) {
14140 14164 if (typeidx >= str_sec->dofs_size) {
14141 14165 dtrace_dof_error(dof, "bad "
14142 14166 "native argument type");
14143 14167 return (-1);
14144 14168 }
14145 14169
14146 14170 typesz = strlen(typestr) + 1;
14147 14171 if (typesz > DTRACE_ARGTYPELEN) {
14148 14172 dtrace_dof_error(dof, "native "
14149 14173 "argument type too long");
14150 14174 return (-1);
14151 14175 }
14152 14176 typeidx += typesz;
14153 14177 typestr += typesz;
14154 14178 }
14155 14179
14156 14180 typeidx = probe->dofpr_xargv;
14157 14181 typestr = strtab + probe->dofpr_xargv;
14158 14182 for (k = 0; k < probe->dofpr_xargc; k++) {
14159 14183 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
14160 14184 dtrace_dof_error(dof, "bad "
14161 14185 "native argument index");
14162 14186 return (-1);
14163 14187 }
14164 14188
14165 14189 if (typeidx >= str_sec->dofs_size) {
14166 14190 dtrace_dof_error(dof, "bad "
14167 14191 "translated argument type");
14168 14192 return (-1);
14169 14193 }
14170 14194
14171 14195 typesz = strlen(typestr) + 1;
14172 14196 if (typesz > DTRACE_ARGTYPELEN) {
14173 14197 dtrace_dof_error(dof, "translated argument "
14174 14198 "type too long");
14175 14199 return (-1);
14176 14200 }
14177 14201
14178 14202 typeidx += typesz;
14179 14203 typestr += typesz;
14180 14204 }
14181 14205 }
14182 14206
14183 14207 return (0);
14184 14208 }
14185 14209
14186 14210 static int
14187 14211 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp)
14188 14212 {
14189 14213 dtrace_helpers_t *help;
14190 14214 dtrace_vstate_t *vstate;
14191 14215 dtrace_enabling_t *enab = NULL;
14192 14216 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
14193 14217 uintptr_t daddr = (uintptr_t)dof;
14194 14218
14195 14219 ASSERT(MUTEX_HELD(&dtrace_lock));
14196 14220
14197 14221 if ((help = curproc->p_dtrace_helpers) == NULL)
14198 14222 help = dtrace_helpers_create(curproc);
14199 14223
14200 14224 vstate = &help->dthps_vstate;
14201 14225
14202 14226 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
14203 14227 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
14204 14228 dtrace_dof_destroy(dof);
14205 14229 return (rv);
14206 14230 }
14207 14231
14208 14232 /*
14209 14233 * Look for helper providers and validate their descriptions.
14210 14234 */
14211 14235 if (dhp != NULL) {
14212 14236 for (i = 0; i < dof->dofh_secnum; i++) {
14213 14237 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
14214 14238 dof->dofh_secoff + i * dof->dofh_secsize);
14215 14239
14216 14240 if (sec->dofs_type != DOF_SECT_PROVIDER)
14217 14241 continue;
14218 14242
14219 14243 if (dtrace_helper_provider_validate(dof, sec) != 0) {
14220 14244 dtrace_enabling_destroy(enab);
14221 14245 dtrace_dof_destroy(dof);
14222 14246 return (-1);
14223 14247 }
14224 14248
14225 14249 nprovs++;
14226 14250 }
14227 14251 }
14228 14252
14229 14253 /*
14230 14254 * Now we need to walk through the ECB descriptions in the enabling.
14231 14255 */
14232 14256 for (i = 0; i < enab->dten_ndesc; i++) {
14233 14257 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
14234 14258 dtrace_probedesc_t *desc = &ep->dted_probe;
14235 14259
14236 14260 if (strcmp(desc->dtpd_provider, "dtrace") != 0)
14237 14261 continue;
14238 14262
14239 14263 if (strcmp(desc->dtpd_mod, "helper") != 0)
14240 14264 continue;
14241 14265
14242 14266 if (strcmp(desc->dtpd_func, "ustack") != 0)
14243 14267 continue;
14244 14268
14245 14269 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
14246 14270 ep)) != 0) {
14247 14271 /*
14248 14272 * Adding this helper action failed -- we are now going
14249 14273 * to rip out the entire generation and return failure.
14250 14274 */
14251 14275 (void) dtrace_helper_destroygen(help->dthps_generation);
14252 14276 dtrace_enabling_destroy(enab);
14253 14277 dtrace_dof_destroy(dof);
14254 14278 return (-1);
14255 14279 }
14256 14280
14257 14281 nhelpers++;
14258 14282 }
14259 14283
14260 14284 if (nhelpers < enab->dten_ndesc)
14261 14285 dtrace_dof_error(dof, "unmatched helpers");
14262 14286
14263 14287 gen = help->dthps_generation++;
14264 14288 dtrace_enabling_destroy(enab);
14265 14289
14266 14290 if (dhp != NULL && nprovs > 0) {
14267 14291 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
14268 14292 if (dtrace_helper_provider_add(dhp, gen) == 0) {
14269 14293 mutex_exit(&dtrace_lock);
14270 14294 dtrace_helper_provider_register(curproc, help, dhp);
14271 14295 mutex_enter(&dtrace_lock);
14272 14296
14273 14297 destroy = 0;
14274 14298 }
14275 14299 }
14276 14300
14277 14301 if (destroy)
14278 14302 dtrace_dof_destroy(dof);
14279 14303
14280 14304 return (gen);
14281 14305 }
14282 14306
14283 14307 static dtrace_helpers_t *
14284 14308 dtrace_helpers_create(proc_t *p)
14285 14309 {
14286 14310 dtrace_helpers_t *help;
14287 14311
14288 14312 ASSERT(MUTEX_HELD(&dtrace_lock));
14289 14313 ASSERT(p->p_dtrace_helpers == NULL);
14290 14314
14291 14315 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
14292 14316 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
14293 14317 DTRACE_NHELPER_ACTIONS, KM_SLEEP);
14294 14318
14295 14319 p->p_dtrace_helpers = help;
14296 14320 dtrace_helpers++;
14297 14321
14298 14322 return (help);
14299 14323 }
14300 14324
14301 14325 static void
14302 14326 dtrace_helpers_destroy(void)
14303 14327 {
14304 14328 dtrace_helpers_t *help;
14305 14329 dtrace_vstate_t *vstate;
14306 14330 proc_t *p = curproc;
14307 14331 int i;
14308 14332
14309 14333 mutex_enter(&dtrace_lock);
14310 14334
14311 14335 ASSERT(p->p_dtrace_helpers != NULL);
14312 14336 ASSERT(dtrace_helpers > 0);
14313 14337
14314 14338 help = p->p_dtrace_helpers;
14315 14339 vstate = &help->dthps_vstate;
14316 14340
14317 14341 /*
14318 14342 * We're now going to lose the help from this process.
14319 14343 */
14320 14344 p->p_dtrace_helpers = NULL;
14321 14345 dtrace_sync();
14322 14346
14323 14347 /*
14324 14348 * Destory the helper actions.
14325 14349 */
14326 14350 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14327 14351 dtrace_helper_action_t *h, *next;
14328 14352
14329 14353 for (h = help->dthps_actions[i]; h != NULL; h = next) {
14330 14354 next = h->dtha_next;
14331 14355 dtrace_helper_action_destroy(h, vstate);
14332 14356 h = next;
14333 14357 }
14334 14358 }
14335 14359
14336 14360 mutex_exit(&dtrace_lock);
14337 14361
14338 14362 /*
14339 14363 * Destroy the helper providers.
14340 14364 */
14341 14365 if (help->dthps_maxprovs > 0) {
14342 14366 mutex_enter(&dtrace_meta_lock);
14343 14367 if (dtrace_meta_pid != NULL) {
14344 14368 ASSERT(dtrace_deferred_pid == NULL);
14345 14369
14346 14370 for (i = 0; i < help->dthps_nprovs; i++) {
14347 14371 dtrace_helper_provider_remove(
14348 14372 &help->dthps_provs[i]->dthp_prov, p->p_pid);
14349 14373 }
14350 14374 } else {
14351 14375 mutex_enter(&dtrace_lock);
14352 14376 ASSERT(help->dthps_deferred == 0 ||
14353 14377 help->dthps_next != NULL ||
14354 14378 help->dthps_prev != NULL ||
14355 14379 help == dtrace_deferred_pid);
14356 14380
14357 14381 /*
14358 14382 * Remove the helper from the deferred list.
14359 14383 */
14360 14384 if (help->dthps_next != NULL)
14361 14385 help->dthps_next->dthps_prev = help->dthps_prev;
14362 14386 if (help->dthps_prev != NULL)
14363 14387 help->dthps_prev->dthps_next = help->dthps_next;
14364 14388 if (dtrace_deferred_pid == help) {
14365 14389 dtrace_deferred_pid = help->dthps_next;
14366 14390 ASSERT(help->dthps_prev == NULL);
14367 14391 }
14368 14392
14369 14393 mutex_exit(&dtrace_lock);
14370 14394 }
14371 14395
14372 14396 mutex_exit(&dtrace_meta_lock);
14373 14397
14374 14398 for (i = 0; i < help->dthps_nprovs; i++) {
14375 14399 dtrace_helper_provider_destroy(help->dthps_provs[i]);
14376 14400 }
14377 14401
14378 14402 kmem_free(help->dthps_provs, help->dthps_maxprovs *
14379 14403 sizeof (dtrace_helper_provider_t *));
14380 14404 }
14381 14405
14382 14406 mutex_enter(&dtrace_lock);
14383 14407
14384 14408 dtrace_vstate_fini(&help->dthps_vstate);
14385 14409 kmem_free(help->dthps_actions,
14386 14410 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
14387 14411 kmem_free(help, sizeof (dtrace_helpers_t));
14388 14412
14389 14413 --dtrace_helpers;
14390 14414 mutex_exit(&dtrace_lock);
14391 14415 }
14392 14416
14393 14417 static void
14394 14418 dtrace_helpers_duplicate(proc_t *from, proc_t *to)
14395 14419 {
14396 14420 dtrace_helpers_t *help, *newhelp;
14397 14421 dtrace_helper_action_t *helper, *new, *last;
14398 14422 dtrace_difo_t *dp;
14399 14423 dtrace_vstate_t *vstate;
14400 14424 int i, j, sz, hasprovs = 0;
14401 14425
14402 14426 mutex_enter(&dtrace_lock);
14403 14427 ASSERT(from->p_dtrace_helpers != NULL);
14404 14428 ASSERT(dtrace_helpers > 0);
14405 14429
14406 14430 help = from->p_dtrace_helpers;
14407 14431 newhelp = dtrace_helpers_create(to);
14408 14432 ASSERT(to->p_dtrace_helpers != NULL);
14409 14433
14410 14434 newhelp->dthps_generation = help->dthps_generation;
14411 14435 vstate = &newhelp->dthps_vstate;
14412 14436
14413 14437 /*
14414 14438 * Duplicate the helper actions.
14415 14439 */
14416 14440 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14417 14441 if ((helper = help->dthps_actions[i]) == NULL)
14418 14442 continue;
14419 14443
14420 14444 for (last = NULL; helper != NULL; helper = helper->dtha_next) {
14421 14445 new = kmem_zalloc(sizeof (dtrace_helper_action_t),
14422 14446 KM_SLEEP);
14423 14447 new->dtha_generation = helper->dtha_generation;
14424 14448
14425 14449 if ((dp = helper->dtha_predicate) != NULL) {
14426 14450 dp = dtrace_difo_duplicate(dp, vstate);
14427 14451 new->dtha_predicate = dp;
14428 14452 }
14429 14453
14430 14454 new->dtha_nactions = helper->dtha_nactions;
14431 14455 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
14432 14456 new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
14433 14457
14434 14458 for (j = 0; j < new->dtha_nactions; j++) {
14435 14459 dtrace_difo_t *dp = helper->dtha_actions[j];
14436 14460
14437 14461 ASSERT(dp != NULL);
14438 14462 dp = dtrace_difo_duplicate(dp, vstate);
14439 14463 new->dtha_actions[j] = dp;
14440 14464 }
14441 14465
14442 14466 if (last != NULL) {
14443 14467 last->dtha_next = new;
14444 14468 } else {
14445 14469 newhelp->dthps_actions[i] = new;
14446 14470 }
14447 14471
14448 14472 last = new;
14449 14473 }
14450 14474 }
14451 14475
14452 14476 /*
14453 14477 * Duplicate the helper providers and register them with the
14454 14478 * DTrace framework.
14455 14479 */
14456 14480 if (help->dthps_nprovs > 0) {
14457 14481 newhelp->dthps_nprovs = help->dthps_nprovs;
14458 14482 newhelp->dthps_maxprovs = help->dthps_nprovs;
14459 14483 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
14460 14484 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
14461 14485 for (i = 0; i < newhelp->dthps_nprovs; i++) {
14462 14486 newhelp->dthps_provs[i] = help->dthps_provs[i];
14463 14487 newhelp->dthps_provs[i]->dthp_ref++;
14464 14488 }
14465 14489
14466 14490 hasprovs = 1;
14467 14491 }
14468 14492
14469 14493 mutex_exit(&dtrace_lock);
14470 14494
14471 14495 if (hasprovs)
14472 14496 dtrace_helper_provider_register(to, newhelp, NULL);
14473 14497 }
14474 14498
14475 14499 /*
14476 14500 * DTrace Hook Functions
14477 14501 */
14478 14502 static void
14479 14503 dtrace_module_loaded(struct modctl *ctl)
14480 14504 {
14481 14505 dtrace_provider_t *prv;
14482 14506
14483 14507 mutex_enter(&dtrace_provider_lock);
14484 14508 mutex_enter(&mod_lock);
14485 14509
14486 14510 ASSERT(ctl->mod_busy);
14487 14511
14488 14512 /*
14489 14513 * We're going to call each providers per-module provide operation
14490 14514 * specifying only this module.
14491 14515 */
14492 14516 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
14493 14517 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
14494 14518
14495 14519 mutex_exit(&mod_lock);
14496 14520 mutex_exit(&dtrace_provider_lock);
14497 14521
14498 14522 /*
14499 14523 * If we have any retained enablings, we need to match against them.
14500 14524 * Enabling probes requires that cpu_lock be held, and we cannot hold
14501 14525 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14502 14526 * module. (In particular, this happens when loading scheduling
14503 14527 * classes.) So if we have any retained enablings, we need to dispatch
14504 14528 * our task queue to do the match for us.
14505 14529 */
14506 14530 mutex_enter(&dtrace_lock);
14507 14531
14508 14532 if (dtrace_retained == NULL) {
14509 14533 mutex_exit(&dtrace_lock);
14510 14534 return;
14511 14535 }
14512 14536
14513 14537 (void) taskq_dispatch(dtrace_taskq,
14514 14538 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
14515 14539
14516 14540 mutex_exit(&dtrace_lock);
14517 14541
14518 14542 /*
14519 14543 * And now, for a little heuristic sleaze: in general, we want to
14520 14544 * match modules as soon as they load. However, we cannot guarantee
14521 14545 * this, because it would lead us to the lock ordering violation
14522 14546 * outlined above. The common case, of course, is that cpu_lock is
14523 14547 * _not_ held -- so we delay here for a clock tick, hoping that that's
14524 14548 * long enough for the task queue to do its work. If it's not, it's
14525 14549 * not a serious problem -- it just means that the module that we
14526 14550 * just loaded may not be immediately instrumentable.
14527 14551 */
14528 14552 delay(1);
14529 14553 }
14530 14554
14531 14555 static void
14532 14556 dtrace_module_unloaded(struct modctl *ctl)
14533 14557 {
14534 14558 dtrace_probe_t template, *probe, *first, *next;
14535 14559 dtrace_provider_t *prov;
14536 14560
14537 14561 template.dtpr_mod = ctl->mod_modname;
14538 14562
14539 14563 mutex_enter(&dtrace_provider_lock);
14540 14564 mutex_enter(&mod_lock);
14541 14565 mutex_enter(&dtrace_lock);
14542 14566
14543 14567 if (dtrace_bymod == NULL) {
14544 14568 /*
14545 14569 * The DTrace module is loaded (obviously) but not attached;
14546 14570 * we don't have any work to do.
14547 14571 */
14548 14572 mutex_exit(&dtrace_provider_lock);
14549 14573 mutex_exit(&mod_lock);
14550 14574 mutex_exit(&dtrace_lock);
14551 14575 return;
14552 14576 }
14553 14577
14554 14578 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
14555 14579 probe != NULL; probe = probe->dtpr_nextmod) {
14556 14580 if (probe->dtpr_ecb != NULL) {
14557 14581 mutex_exit(&dtrace_provider_lock);
14558 14582 mutex_exit(&mod_lock);
14559 14583 mutex_exit(&dtrace_lock);
14560 14584
14561 14585 /*
14562 14586 * This shouldn't _actually_ be possible -- we're
14563 14587 * unloading a module that has an enabled probe in it.
14564 14588 * (It's normally up to the provider to make sure that
14565 14589 * this can't happen.) However, because dtps_enable()
14566 14590 * doesn't have a failure mode, there can be an
14567 14591 * enable/unload race. Upshot: we don't want to
14568 14592 * assert, but we're not going to disable the
14569 14593 * probe, either.
14570 14594 */
14571 14595 if (dtrace_err_verbose) {
14572 14596 cmn_err(CE_WARN, "unloaded module '%s' had "
14573 14597 "enabled probes", ctl->mod_modname);
14574 14598 }
14575 14599
14576 14600 return;
14577 14601 }
14578 14602 }
14579 14603
14580 14604 probe = first;
14581 14605
14582 14606 for (first = NULL; probe != NULL; probe = next) {
14583 14607 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
14584 14608
14585 14609 dtrace_probes[probe->dtpr_id - 1] = NULL;
14586 14610
14587 14611 next = probe->dtpr_nextmod;
14588 14612 dtrace_hash_remove(dtrace_bymod, probe);
14589 14613 dtrace_hash_remove(dtrace_byfunc, probe);
14590 14614 dtrace_hash_remove(dtrace_byname, probe);
14591 14615
14592 14616 if (first == NULL) {
14593 14617 first = probe;
14594 14618 probe->dtpr_nextmod = NULL;
14595 14619 } else {
14596 14620 probe->dtpr_nextmod = first;
14597 14621 first = probe;
14598 14622 }
14599 14623 }
14600 14624
14601 14625 /*
14602 14626 * We've removed all of the module's probes from the hash chains and
14603 14627 * from the probe array. Now issue a dtrace_sync() to be sure that
14604 14628 * everyone has cleared out from any probe array processing.
14605 14629 */
14606 14630 dtrace_sync();
14607 14631
14608 14632 for (probe = first; probe != NULL; probe = first) {
14609 14633 first = probe->dtpr_nextmod;
14610 14634 prov = probe->dtpr_provider;
14611 14635 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
14612 14636 probe->dtpr_arg);
14613 14637 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
14614 14638 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
14615 14639 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
14616 14640 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
14617 14641 kmem_free(probe, sizeof (dtrace_probe_t));
14618 14642 }
14619 14643
14620 14644 mutex_exit(&dtrace_lock);
14621 14645 mutex_exit(&mod_lock);
14622 14646 mutex_exit(&dtrace_provider_lock);
14623 14647 }
14624 14648
14625 14649 void
14626 14650 dtrace_suspend(void)
14627 14651 {
14628 14652 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
14629 14653 }
14630 14654
14631 14655 void
14632 14656 dtrace_resume(void)
14633 14657 {
14634 14658 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
14635 14659 }
14636 14660
14637 14661 static int
14638 14662 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
14639 14663 {
14640 14664 ASSERT(MUTEX_HELD(&cpu_lock));
14641 14665 mutex_enter(&dtrace_lock);
14642 14666
14643 14667 switch (what) {
14644 14668 case CPU_CONFIG: {
14645 14669 dtrace_state_t *state;
14646 14670 dtrace_optval_t *opt, rs, c;
14647 14671
14648 14672 /*
14649 14673 * For now, we only allocate a new buffer for anonymous state.
14650 14674 */
14651 14675 if ((state = dtrace_anon.dta_state) == NULL)
14652 14676 break;
14653 14677
14654 14678 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
14655 14679 break;
14656 14680
14657 14681 opt = state->dts_options;
14658 14682 c = opt[DTRACEOPT_CPU];
14659 14683
14660 14684 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
14661 14685 break;
14662 14686
14663 14687 /*
14664 14688 * Regardless of what the actual policy is, we're going to
14665 14689 * temporarily set our resize policy to be manual. We're
14666 14690 * also going to temporarily set our CPU option to denote
14667 14691 * the newly configured CPU.
14668 14692 */
14669 14693 rs = opt[DTRACEOPT_BUFRESIZE];
14670 14694 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
14671 14695 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
14672 14696
14673 14697 (void) dtrace_state_buffers(state);
14674 14698
14675 14699 opt[DTRACEOPT_BUFRESIZE] = rs;
14676 14700 opt[DTRACEOPT_CPU] = c;
14677 14701
14678 14702 break;
14679 14703 }
14680 14704
14681 14705 case CPU_UNCONFIG:
14682 14706 /*
14683 14707 * We don't free the buffer in the CPU_UNCONFIG case. (The
14684 14708 * buffer will be freed when the consumer exits.)
14685 14709 */
14686 14710 break;
14687 14711
14688 14712 default:
14689 14713 break;
14690 14714 }
14691 14715
14692 14716 mutex_exit(&dtrace_lock);
14693 14717 return (0);
14694 14718 }
14695 14719
14696 14720 static void
14697 14721 dtrace_cpu_setup_initial(processorid_t cpu)
14698 14722 {
14699 14723 (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
14700 14724 }
14701 14725
14702 14726 static void
14703 14727 dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
14704 14728 {
14705 14729 if (dtrace_toxranges >= dtrace_toxranges_max) {
14706 14730 int osize, nsize;
14707 14731 dtrace_toxrange_t *range;
14708 14732
14709 14733 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14710 14734
14711 14735 if (osize == 0) {
14712 14736 ASSERT(dtrace_toxrange == NULL);
14713 14737 ASSERT(dtrace_toxranges_max == 0);
14714 14738 dtrace_toxranges_max = 1;
14715 14739 } else {
14716 14740 dtrace_toxranges_max <<= 1;
14717 14741 }
14718 14742
14719 14743 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14720 14744 range = kmem_zalloc(nsize, KM_SLEEP);
14721 14745
14722 14746 if (dtrace_toxrange != NULL) {
14723 14747 ASSERT(osize != 0);
14724 14748 bcopy(dtrace_toxrange, range, osize);
14725 14749 kmem_free(dtrace_toxrange, osize);
14726 14750 }
14727 14751
14728 14752 dtrace_toxrange = range;
14729 14753 }
14730 14754
14731 14755 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14732 14756 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14733 14757
14734 14758 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14735 14759 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14736 14760 dtrace_toxranges++;
14737 14761 }
14738 14762
14739 14763 /*
14740 14764 * DTrace Driver Cookbook Functions
14741 14765 */
14742 14766 /*ARGSUSED*/
14743 14767 static int
14744 14768 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14745 14769 {
14746 14770 dtrace_provider_id_t id;
14747 14771 dtrace_state_t *state = NULL;
14748 14772 dtrace_enabling_t *enab;
14749 14773
14750 14774 mutex_enter(&cpu_lock);
14751 14775 mutex_enter(&dtrace_provider_lock);
14752 14776 mutex_enter(&dtrace_lock);
14753 14777
14754 14778 if (ddi_soft_state_init(&dtrace_softstate,
14755 14779 sizeof (dtrace_state_t), 0) != 0) {
14756 14780 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14757 14781 mutex_exit(&cpu_lock);
14758 14782 mutex_exit(&dtrace_provider_lock);
14759 14783 mutex_exit(&dtrace_lock);
14760 14784 return (DDI_FAILURE);
14761 14785 }
14762 14786
14763 14787 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
14764 14788 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
14765 14789 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
14766 14790 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
14767 14791 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
14768 14792 ddi_remove_minor_node(devi, NULL);
14769 14793 ddi_soft_state_fini(&dtrace_softstate);
14770 14794 mutex_exit(&cpu_lock);
14771 14795 mutex_exit(&dtrace_provider_lock);
14772 14796 mutex_exit(&dtrace_lock);
14773 14797 return (DDI_FAILURE);
14774 14798 }
14775 14799
14776 14800 ddi_report_dev(devi);
14777 14801 dtrace_devi = devi;
14778 14802
14779 14803 dtrace_modload = dtrace_module_loaded;
14780 14804 dtrace_modunload = dtrace_module_unloaded;
14781 14805 dtrace_cpu_init = dtrace_cpu_setup_initial;
14782 14806 dtrace_helpers_cleanup = dtrace_helpers_destroy;
14783 14807 dtrace_helpers_fork = dtrace_helpers_duplicate;
14784 14808 dtrace_cpustart_init = dtrace_suspend;
14785 14809 dtrace_cpustart_fini = dtrace_resume;
14786 14810 dtrace_debugger_init = dtrace_suspend;
14787 14811 dtrace_debugger_fini = dtrace_resume;
14788 14812
14789 14813 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
14790 14814
14791 14815 ASSERT(MUTEX_HELD(&cpu_lock));
14792 14816
14793 14817 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
14794 14818 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
14795 14819 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
14796 14820 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
14797 14821 VM_SLEEP | VMC_IDENTIFIER);
14798 14822 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
14799 14823 1, INT_MAX, 0);
14800 14824
14801 14825 dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
14802 14826 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
14803 14827 NULL, NULL, NULL, NULL, NULL, 0);
14804 14828
14805 14829 ASSERT(MUTEX_HELD(&cpu_lock));
14806 14830 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
14807 14831 offsetof(dtrace_probe_t, dtpr_nextmod),
14808 14832 offsetof(dtrace_probe_t, dtpr_prevmod));
14809 14833
14810 14834 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
14811 14835 offsetof(dtrace_probe_t, dtpr_nextfunc),
14812 14836 offsetof(dtrace_probe_t, dtpr_prevfunc));
14813 14837
14814 14838 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
14815 14839 offsetof(dtrace_probe_t, dtpr_nextname),
14816 14840 offsetof(dtrace_probe_t, dtpr_prevname));
14817 14841
14818 14842 if (dtrace_retain_max < 1) {
14819 14843 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
14820 14844 "setting to 1", dtrace_retain_max);
14821 14845 dtrace_retain_max = 1;
14822 14846 }
14823 14847
14824 14848 /*
14825 14849 * Now discover our toxic ranges.
14826 14850 */
14827 14851 dtrace_toxic_ranges(dtrace_toxrange_add);
14828 14852
14829 14853 /*
14830 14854 * Before we register ourselves as a provider to our own framework,
14831 14855 * we would like to assert that dtrace_provider is NULL -- but that's
14832 14856 * not true if we were loaded as a dependency of a DTrace provider.
14833 14857 * Once we've registered, we can assert that dtrace_provider is our
14834 14858 * pseudo provider.
14835 14859 */
14836 14860 (void) dtrace_register("dtrace", &dtrace_provider_attr,
14837 14861 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
14838 14862
14839 14863 ASSERT(dtrace_provider != NULL);
14840 14864 ASSERT((dtrace_provider_id_t)dtrace_provider == id);
14841 14865
14842 14866 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
14843 14867 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
14844 14868 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
14845 14869 dtrace_provider, NULL, NULL, "END", 0, NULL);
14846 14870 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
14847 14871 dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
14848 14872
14849 14873 dtrace_anon_property();
14850 14874 mutex_exit(&cpu_lock);
14851 14875
14852 14876 /*
14853 14877 * If DTrace helper tracing is enabled, we need to allocate the
14854 14878 * trace buffer and initialize the values.
14855 14879 */
14856 14880 if (dtrace_helptrace_enabled) {
14857 14881 ASSERT(dtrace_helptrace_buffer == NULL);
14858 14882 dtrace_helptrace_buffer =
14859 14883 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
14860 14884 dtrace_helptrace_next = 0;
14861 14885 }
14862 14886
14863 14887 /*
14864 14888 * If there are already providers, we must ask them to provide their
14865 14889 * probes, and then match any anonymous enabling against them. Note
14866 14890 * that there should be no other retained enablings at this time:
14867 14891 * the only retained enablings at this time should be the anonymous
14868 14892 * enabling.
14869 14893 */
14870 14894 if (dtrace_anon.dta_enabling != NULL) {
14871 14895 ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
14872 14896
14873 14897 dtrace_enabling_provide(NULL);
14874 14898 state = dtrace_anon.dta_state;
14875 14899
14876 14900 /*
14877 14901 * We couldn't hold cpu_lock across the above call to
14878 14902 * dtrace_enabling_provide(), but we must hold it to actually
14879 14903 * enable the probes. We have to drop all of our locks, pick
14880 14904 * up cpu_lock, and regain our locks before matching the
14881 14905 * retained anonymous enabling.
14882 14906 */
14883 14907 mutex_exit(&dtrace_lock);
14884 14908 mutex_exit(&dtrace_provider_lock);
14885 14909
14886 14910 mutex_enter(&cpu_lock);
14887 14911 mutex_enter(&dtrace_provider_lock);
14888 14912 mutex_enter(&dtrace_lock);
14889 14913
14890 14914 if ((enab = dtrace_anon.dta_enabling) != NULL)
14891 14915 (void) dtrace_enabling_match(enab, NULL);
14892 14916
14893 14917 mutex_exit(&cpu_lock);
14894 14918 }
14895 14919
14896 14920 mutex_exit(&dtrace_lock);
14897 14921 mutex_exit(&dtrace_provider_lock);
14898 14922
14899 14923 if (state != NULL) {
14900 14924 /*
14901 14925 * If we created any anonymous state, set it going now.
14902 14926 */
14903 14927 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
14904 14928 }
14905 14929
14906 14930 return (DDI_SUCCESS);
14907 14931 }
14908 14932
14909 14933 /*ARGSUSED*/
14910 14934 static int
14911 14935 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
14912 14936 {
14913 14937 dtrace_state_t *state;
14914 14938 uint32_t priv;
14915 14939 uid_t uid;
14916 14940 zoneid_t zoneid;
14917 14941
14918 14942 if (getminor(*devp) == DTRACEMNRN_HELPER)
14919 14943 return (0);
14920 14944
14921 14945 /*
14922 14946 * If this wasn't an open with the "helper" minor, then it must be
14923 14947 * the "dtrace" minor.
14924 14948 */
14925 14949 if (getminor(*devp) != DTRACEMNRN_DTRACE)
14926 14950 return (ENXIO);
14927 14951
14928 14952 /*
14929 14953 * If no DTRACE_PRIV_* bits are set in the credential, then the
14930 14954 * caller lacks sufficient permission to do anything with DTrace.
14931 14955 */
14932 14956 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
14933 14957 if (priv == DTRACE_PRIV_NONE)
14934 14958 return (EACCES);
14935 14959
14936 14960 /*
14937 14961 * Ask all providers to provide all their probes.
14938 14962 */
14939 14963 mutex_enter(&dtrace_provider_lock);
14940 14964 dtrace_probe_provide(NULL, NULL);
14941 14965 mutex_exit(&dtrace_provider_lock);
14942 14966
14943 14967 mutex_enter(&cpu_lock);
14944 14968 mutex_enter(&dtrace_lock);
14945 14969 dtrace_opens++;
14946 14970 dtrace_membar_producer();
14947 14971
14948 14972 /*
14949 14973 * If the kernel debugger is active (that is, if the kernel debugger
14950 14974 * modified text in some way), we won't allow the open.
14951 14975 */
14952 14976 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
14953 14977 dtrace_opens--;
14954 14978 mutex_exit(&cpu_lock);
14955 14979 mutex_exit(&dtrace_lock);
14956 14980 return (EBUSY);
14957 14981 }
14958 14982
14959 14983 state = dtrace_state_create(devp, cred_p);
14960 14984 mutex_exit(&cpu_lock);
14961 14985
14962 14986 if (state == NULL) {
14963 14987 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
14964 14988 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
14965 14989 mutex_exit(&dtrace_lock);
14966 14990 return (EAGAIN);
14967 14991 }
14968 14992
14969 14993 mutex_exit(&dtrace_lock);
14970 14994
14971 14995 return (0);
14972 14996 }
14973 14997
14974 14998 /*ARGSUSED*/
14975 14999 static int
14976 15000 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
14977 15001 {
14978 15002 minor_t minor = getminor(dev);
14979 15003 dtrace_state_t *state;
14980 15004
14981 15005 if (minor == DTRACEMNRN_HELPER)
14982 15006 return (0);
14983 15007
14984 15008 state = ddi_get_soft_state(dtrace_softstate, minor);
14985 15009
14986 15010 mutex_enter(&cpu_lock);
14987 15011 mutex_enter(&dtrace_lock);
14988 15012
14989 15013 if (state->dts_anon) {
14990 15014 /*
14991 15015 * There is anonymous state. Destroy that first.
14992 15016 */
14993 15017 ASSERT(dtrace_anon.dta_state == NULL);
14994 15018 dtrace_state_destroy(state->dts_anon);
14995 15019 }
14996 15020
14997 15021 dtrace_state_destroy(state);
14998 15022 ASSERT(dtrace_opens > 0);
14999 15023
15000 15024 /*
15001 15025 * Only relinquish control of the kernel debugger interface when there
15002 15026 * are no consumers and no anonymous enablings.
15003 15027 */
15004 15028 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
15005 15029 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15006 15030
15007 15031 mutex_exit(&dtrace_lock);
15008 15032 mutex_exit(&cpu_lock);
15009 15033
15010 15034 return (0);
15011 15035 }
15012 15036
15013 15037 /*ARGSUSED*/
15014 15038 static int
15015 15039 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
15016 15040 {
15017 15041 int rval;
15018 15042 dof_helper_t help, *dhp = NULL;
15019 15043
15020 15044 switch (cmd) {
15021 15045 case DTRACEHIOC_ADDDOF:
15022 15046 if (copyin((void *)arg, &help, sizeof (help)) != 0) {
15023 15047 dtrace_dof_error(NULL, "failed to copyin DOF helper");
15024 15048 return (EFAULT);
15025 15049 }
15026 15050
15027 15051 dhp = &help;
15028 15052 arg = (intptr_t)help.dofhp_dof;
15029 15053 /*FALLTHROUGH*/
15030 15054
15031 15055 case DTRACEHIOC_ADD: {
15032 15056 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
15033 15057
15034 15058 if (dof == NULL)
15035 15059 return (rval);
15036 15060
15037 15061 mutex_enter(&dtrace_lock);
15038 15062
15039 15063 /*
15040 15064 * dtrace_helper_slurp() takes responsibility for the dof --
15041 15065 * it may free it now or it may save it and free it later.
15042 15066 */
15043 15067 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
15044 15068 *rv = rval;
15045 15069 rval = 0;
15046 15070 } else {
15047 15071 rval = EINVAL;
15048 15072 }
15049 15073
15050 15074 mutex_exit(&dtrace_lock);
15051 15075 return (rval);
15052 15076 }
15053 15077
15054 15078 case DTRACEHIOC_REMOVE: {
15055 15079 mutex_enter(&dtrace_lock);
15056 15080 rval = dtrace_helper_destroygen(arg);
15057 15081 mutex_exit(&dtrace_lock);
15058 15082
15059 15083 return (rval);
15060 15084 }
15061 15085
15062 15086 default:
15063 15087 break;
15064 15088 }
15065 15089
15066 15090 return (ENOTTY);
15067 15091 }
15068 15092
15069 15093 /*ARGSUSED*/
15070 15094 static int
15071 15095 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
15072 15096 {
15073 15097 minor_t minor = getminor(dev);
15074 15098 dtrace_state_t *state;
15075 15099 int rval;
15076 15100
15077 15101 if (minor == DTRACEMNRN_HELPER)
15078 15102 return (dtrace_ioctl_helper(cmd, arg, rv));
15079 15103
15080 15104 state = ddi_get_soft_state(dtrace_softstate, minor);
15081 15105
15082 15106 if (state->dts_anon) {
15083 15107 ASSERT(dtrace_anon.dta_state == NULL);
15084 15108 state = state->dts_anon;
15085 15109 }
15086 15110
15087 15111 switch (cmd) {
15088 15112 case DTRACEIOC_PROVIDER: {
15089 15113 dtrace_providerdesc_t pvd;
15090 15114 dtrace_provider_t *pvp;
15091 15115
15092 15116 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
15093 15117 return (EFAULT);
15094 15118
15095 15119 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
15096 15120 mutex_enter(&dtrace_provider_lock);
15097 15121
15098 15122 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
15099 15123 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
15100 15124 break;
15101 15125 }
15102 15126
15103 15127 mutex_exit(&dtrace_provider_lock);
15104 15128
15105 15129 if (pvp == NULL)
15106 15130 return (ESRCH);
15107 15131
15108 15132 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
15109 15133 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
15110 15134 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
15111 15135 return (EFAULT);
15112 15136
15113 15137 return (0);
15114 15138 }
15115 15139
15116 15140 case DTRACEIOC_EPROBE: {
15117 15141 dtrace_eprobedesc_t epdesc;
15118 15142 dtrace_ecb_t *ecb;
15119 15143 dtrace_action_t *act;
15120 15144 void *buf;
15121 15145 size_t size;
15122 15146 uintptr_t dest;
15123 15147 int nrecs;
15124 15148
15125 15149 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
15126 15150 return (EFAULT);
15127 15151
15128 15152 mutex_enter(&dtrace_lock);
15129 15153
15130 15154 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
15131 15155 mutex_exit(&dtrace_lock);
15132 15156 return (EINVAL);
15133 15157 }
15134 15158
15135 15159 if (ecb->dte_probe == NULL) {
15136 15160 mutex_exit(&dtrace_lock);
15137 15161 return (EINVAL);
15138 15162 }
15139 15163
15140 15164 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
15141 15165 epdesc.dtepd_uarg = ecb->dte_uarg;
15142 15166 epdesc.dtepd_size = ecb->dte_size;
15143 15167
15144 15168 nrecs = epdesc.dtepd_nrecs;
15145 15169 epdesc.dtepd_nrecs = 0;
15146 15170 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
15147 15171 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
15148 15172 continue;
15149 15173
15150 15174 epdesc.dtepd_nrecs++;
15151 15175 }
15152 15176
15153 15177 /*
15154 15178 * Now that we have the size, we need to allocate a temporary
15155 15179 * buffer in which to store the complete description. We need
15156 15180 * the temporary buffer to be able to drop dtrace_lock()
15157 15181 * across the copyout(), below.
15158 15182 */
15159 15183 size = sizeof (dtrace_eprobedesc_t) +
15160 15184 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
15161 15185
15162 15186 buf = kmem_alloc(size, KM_SLEEP);
15163 15187 dest = (uintptr_t)buf;
15164 15188
15165 15189 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
15166 15190 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
15167 15191
15168 15192 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
15169 15193 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
15170 15194 continue;
15171 15195
15172 15196 if (nrecs-- == 0)
15173 15197 break;
15174 15198
15175 15199 bcopy(&act->dta_rec, (void *)dest,
15176 15200 sizeof (dtrace_recdesc_t));
15177 15201 dest += sizeof (dtrace_recdesc_t);
15178 15202 }
15179 15203
15180 15204 mutex_exit(&dtrace_lock);
15181 15205
15182 15206 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
15183 15207 kmem_free(buf, size);
15184 15208 return (EFAULT);
15185 15209 }
15186 15210
15187 15211 kmem_free(buf, size);
15188 15212 return (0);
15189 15213 }
15190 15214
15191 15215 case DTRACEIOC_AGGDESC: {
15192 15216 dtrace_aggdesc_t aggdesc;
15193 15217 dtrace_action_t *act;
15194 15218 dtrace_aggregation_t *agg;
15195 15219 int nrecs;
15196 15220 uint32_t offs;
15197 15221 dtrace_recdesc_t *lrec;
15198 15222 void *buf;
15199 15223 size_t size;
15200 15224 uintptr_t dest;
15201 15225
15202 15226 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
15203 15227 return (EFAULT);
15204 15228
15205 15229 mutex_enter(&dtrace_lock);
15206 15230
15207 15231 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
15208 15232 mutex_exit(&dtrace_lock);
15209 15233 return (EINVAL);
15210 15234 }
15211 15235
15212 15236 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
15213 15237
15214 15238 nrecs = aggdesc.dtagd_nrecs;
15215 15239 aggdesc.dtagd_nrecs = 0;
15216 15240
15217 15241 offs = agg->dtag_base;
15218 15242 lrec = &agg->dtag_action.dta_rec;
15219 15243 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
15220 15244
15221 15245 for (act = agg->dtag_first; ; act = act->dta_next) {
15222 15246 ASSERT(act->dta_intuple ||
15223 15247 DTRACEACT_ISAGG(act->dta_kind));
15224 15248
15225 15249 /*
15226 15250 * If this action has a record size of zero, it
15227 15251 * denotes an argument to the aggregating action.
15228 15252 * Because the presence of this record doesn't (or
15229 15253 * shouldn't) affect the way the data is interpreted,
15230 15254 * we don't copy it out to save user-level the
15231 15255 * confusion of dealing with a zero-length record.
15232 15256 */
15233 15257 if (act->dta_rec.dtrd_size == 0) {
15234 15258 ASSERT(agg->dtag_hasarg);
15235 15259 continue;
15236 15260 }
15237 15261
15238 15262 aggdesc.dtagd_nrecs++;
15239 15263
15240 15264 if (act == &agg->dtag_action)
15241 15265 break;
15242 15266 }
15243 15267
15244 15268 /*
15245 15269 * Now that we have the size, we need to allocate a temporary
15246 15270 * buffer in which to store the complete description. We need
15247 15271 * the temporary buffer to be able to drop dtrace_lock()
15248 15272 * across the copyout(), below.
15249 15273 */
15250 15274 size = sizeof (dtrace_aggdesc_t) +
15251 15275 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
15252 15276
15253 15277 buf = kmem_alloc(size, KM_SLEEP);
15254 15278 dest = (uintptr_t)buf;
15255 15279
15256 15280 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
15257 15281 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
15258 15282
15259 15283 for (act = agg->dtag_first; ; act = act->dta_next) {
15260 15284 dtrace_recdesc_t rec = act->dta_rec;
15261 15285
15262 15286 /*
15263 15287 * See the comment in the above loop for why we pass
15264 15288 * over zero-length records.
15265 15289 */
15266 15290 if (rec.dtrd_size == 0) {
15267 15291 ASSERT(agg->dtag_hasarg);
15268 15292 continue;
15269 15293 }
15270 15294
15271 15295 if (nrecs-- == 0)
15272 15296 break;
15273 15297
15274 15298 rec.dtrd_offset -= offs;
15275 15299 bcopy(&rec, (void *)dest, sizeof (rec));
15276 15300 dest += sizeof (dtrace_recdesc_t);
15277 15301
15278 15302 if (act == &agg->dtag_action)
15279 15303 break;
15280 15304 }
15281 15305
15282 15306 mutex_exit(&dtrace_lock);
15283 15307
15284 15308 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
15285 15309 kmem_free(buf, size);
15286 15310 return (EFAULT);
15287 15311 }
15288 15312
15289 15313 kmem_free(buf, size);
15290 15314 return (0);
15291 15315 }
15292 15316
15293 15317 case DTRACEIOC_ENABLE: {
15294 15318 dof_hdr_t *dof;
15295 15319 dtrace_enabling_t *enab = NULL;
15296 15320 dtrace_vstate_t *vstate;
15297 15321 int err = 0;
15298 15322
15299 15323 *rv = 0;
15300 15324
15301 15325 /*
15302 15326 * If a NULL argument has been passed, we take this as our
15303 15327 * cue to reevaluate our enablings.
15304 15328 */
15305 15329 if (arg == NULL) {
15306 15330 dtrace_enabling_matchall();
15307 15331
15308 15332 return (0);
15309 15333 }
15310 15334
15311 15335 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
15312 15336 return (rval);
15313 15337
15314 15338 mutex_enter(&cpu_lock);
15315 15339 mutex_enter(&dtrace_lock);
15316 15340 vstate = &state->dts_vstate;
15317 15341
15318 15342 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
15319 15343 mutex_exit(&dtrace_lock);
15320 15344 mutex_exit(&cpu_lock);
15321 15345 dtrace_dof_destroy(dof);
15322 15346 return (EBUSY);
15323 15347 }
15324 15348
15325 15349 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
15326 15350 mutex_exit(&dtrace_lock);
15327 15351 mutex_exit(&cpu_lock);
15328 15352 dtrace_dof_destroy(dof);
15329 15353 return (EINVAL);
15330 15354 }
15331 15355
15332 15356 if ((rval = dtrace_dof_options(dof, state)) != 0) {
15333 15357 dtrace_enabling_destroy(enab);
15334 15358 mutex_exit(&dtrace_lock);
15335 15359 mutex_exit(&cpu_lock);
15336 15360 dtrace_dof_destroy(dof);
15337 15361 return (rval);
15338 15362 }
15339 15363
15340 15364 if ((err = dtrace_enabling_match(enab, rv)) == 0) {
15341 15365 err = dtrace_enabling_retain(enab);
15342 15366 } else {
15343 15367 dtrace_enabling_destroy(enab);
15344 15368 }
15345 15369
15346 15370 mutex_exit(&cpu_lock);
15347 15371 mutex_exit(&dtrace_lock);
15348 15372 dtrace_dof_destroy(dof);
15349 15373
15350 15374 return (err);
15351 15375 }
15352 15376
15353 15377 case DTRACEIOC_REPLICATE: {
15354 15378 dtrace_repldesc_t desc;
15355 15379 dtrace_probedesc_t *match = &desc.dtrpd_match;
15356 15380 dtrace_probedesc_t *create = &desc.dtrpd_create;
15357 15381 int err;
15358 15382
15359 15383 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15360 15384 return (EFAULT);
15361 15385
15362 15386 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15363 15387 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15364 15388 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15365 15389 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15366 15390
15367 15391 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15368 15392 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15369 15393 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15370 15394 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15371 15395
15372 15396 mutex_enter(&dtrace_lock);
15373 15397 err = dtrace_enabling_replicate(state, match, create);
15374 15398 mutex_exit(&dtrace_lock);
15375 15399
15376 15400 return (err);
15377 15401 }
15378 15402
15379 15403 case DTRACEIOC_PROBEMATCH:
15380 15404 case DTRACEIOC_PROBES: {
15381 15405 dtrace_probe_t *probe = NULL;
15382 15406 dtrace_probedesc_t desc;
15383 15407 dtrace_probekey_t pkey;
15384 15408 dtrace_id_t i;
15385 15409 int m = 0;
15386 15410 uint32_t priv;
15387 15411 uid_t uid;
15388 15412 zoneid_t zoneid;
15389 15413
15390 15414 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15391 15415 return (EFAULT);
15392 15416
15393 15417 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15394 15418 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15395 15419 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15396 15420 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15397 15421
15398 15422 /*
15399 15423 * Before we attempt to match this probe, we want to give
15400 15424 * all providers the opportunity to provide it.
15401 15425 */
15402 15426 if (desc.dtpd_id == DTRACE_IDNONE) {
15403 15427 mutex_enter(&dtrace_provider_lock);
15404 15428 dtrace_probe_provide(&desc, NULL);
15405 15429 mutex_exit(&dtrace_provider_lock);
15406 15430 desc.dtpd_id++;
15407 15431 }
15408 15432
15409 15433 if (cmd == DTRACEIOC_PROBEMATCH) {
15410 15434 dtrace_probekey(&desc, &pkey);
15411 15435 pkey.dtpk_id = DTRACE_IDNONE;
15412 15436 }
15413 15437
15414 15438 dtrace_cred2priv(cr, &priv, &uid, &zoneid);
15415 15439
15416 15440 mutex_enter(&dtrace_lock);
15417 15441
15418 15442 if (cmd == DTRACEIOC_PROBEMATCH) {
15419 15443 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
15420 15444 if ((probe = dtrace_probes[i - 1]) != NULL &&
15421 15445 (m = dtrace_match_probe(probe, &pkey,
15422 15446 priv, uid, zoneid)) != 0)
15423 15447 break;
15424 15448 }
15425 15449
15426 15450 if (m < 0) {
15427 15451 mutex_exit(&dtrace_lock);
15428 15452 return (EINVAL);
15429 15453 }
15430 15454
15431 15455 } else {
15432 15456 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
15433 15457 if ((probe = dtrace_probes[i - 1]) != NULL &&
15434 15458 dtrace_match_priv(probe, priv, uid, zoneid))
15435 15459 break;
15436 15460 }
15437 15461 }
15438 15462
15439 15463 if (probe == NULL) {
15440 15464 mutex_exit(&dtrace_lock);
15441 15465 return (ESRCH);
15442 15466 }
15443 15467
15444 15468 dtrace_probe_description(probe, &desc);
15445 15469 mutex_exit(&dtrace_lock);
15446 15470
15447 15471 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15448 15472 return (EFAULT);
15449 15473
15450 15474 return (0);
15451 15475 }
15452 15476
15453 15477 case DTRACEIOC_PROBEARG: {
15454 15478 dtrace_argdesc_t desc;
15455 15479 dtrace_probe_t *probe;
15456 15480 dtrace_provider_t *prov;
15457 15481
15458 15482 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15459 15483 return (EFAULT);
15460 15484
15461 15485 if (desc.dtargd_id == DTRACE_IDNONE)
15462 15486 return (EINVAL);
15463 15487
15464 15488 if (desc.dtargd_ndx == DTRACE_ARGNONE)
15465 15489 return (EINVAL);
15466 15490
15467 15491 mutex_enter(&dtrace_provider_lock);
15468 15492 mutex_enter(&mod_lock);
15469 15493 mutex_enter(&dtrace_lock);
15470 15494
15471 15495 if (desc.dtargd_id > dtrace_nprobes) {
15472 15496 mutex_exit(&dtrace_lock);
15473 15497 mutex_exit(&mod_lock);
15474 15498 mutex_exit(&dtrace_provider_lock);
15475 15499 return (EINVAL);
15476 15500 }
15477 15501
15478 15502 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
15479 15503 mutex_exit(&dtrace_lock);
15480 15504 mutex_exit(&mod_lock);
15481 15505 mutex_exit(&dtrace_provider_lock);
15482 15506 return (EINVAL);
15483 15507 }
15484 15508
15485 15509 mutex_exit(&dtrace_lock);
15486 15510
15487 15511 prov = probe->dtpr_provider;
15488 15512
15489 15513 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
15490 15514 /*
15491 15515 * There isn't any typed information for this probe.
15492 15516 * Set the argument number to DTRACE_ARGNONE.
15493 15517 */
15494 15518 desc.dtargd_ndx = DTRACE_ARGNONE;
15495 15519 } else {
15496 15520 desc.dtargd_native[0] = '\0';
15497 15521 desc.dtargd_xlate[0] = '\0';
15498 15522 desc.dtargd_mapping = desc.dtargd_ndx;
15499 15523
15500 15524 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
15501 15525 probe->dtpr_id, probe->dtpr_arg, &desc);
15502 15526 }
15503 15527
15504 15528 mutex_exit(&mod_lock);
15505 15529 mutex_exit(&dtrace_provider_lock);
15506 15530
15507 15531 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15508 15532 return (EFAULT);
15509 15533
15510 15534 return (0);
15511 15535 }
15512 15536
15513 15537 case DTRACEIOC_GO: {
15514 15538 processorid_t cpuid;
15515 15539 rval = dtrace_state_go(state, &cpuid);
15516 15540
15517 15541 if (rval != 0)
15518 15542 return (rval);
15519 15543
15520 15544 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
15521 15545 return (EFAULT);
15522 15546
15523 15547 return (0);
15524 15548 }
15525 15549
15526 15550 case DTRACEIOC_STOP: {
15527 15551 processorid_t cpuid;
15528 15552
15529 15553 mutex_enter(&dtrace_lock);
15530 15554 rval = dtrace_state_stop(state, &cpuid);
15531 15555 mutex_exit(&dtrace_lock);
15532 15556
15533 15557 if (rval != 0)
15534 15558 return (rval);
15535 15559
15536 15560 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
15537 15561 return (EFAULT);
15538 15562
15539 15563 return (0);
15540 15564 }
15541 15565
15542 15566 case DTRACEIOC_DOFGET: {
15543 15567 dof_hdr_t hdr, *dof;
15544 15568 uint64_t len;
15545 15569
15546 15570 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
15547 15571 return (EFAULT);
15548 15572
15549 15573 mutex_enter(&dtrace_lock);
15550 15574 dof = dtrace_dof_create(state);
15551 15575 mutex_exit(&dtrace_lock);
15552 15576
15553 15577 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
15554 15578 rval = copyout(dof, (void *)arg, len);
15555 15579 dtrace_dof_destroy(dof);
15556 15580
15557 15581 return (rval == 0 ? 0 : EFAULT);
15558 15582 }
15559 15583
15560 15584 case DTRACEIOC_AGGSNAP:
15561 15585 case DTRACEIOC_BUFSNAP: {
15562 15586 dtrace_bufdesc_t desc;
15563 15587 caddr_t cached;
15564 15588 dtrace_buffer_t *buf;
15565 15589
15566 15590 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15567 15591 return (EFAULT);
15568 15592
15569 15593 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
15570 15594 return (EINVAL);
15571 15595
15572 15596 mutex_enter(&dtrace_lock);
15573 15597
15574 15598 if (cmd == DTRACEIOC_BUFSNAP) {
15575 15599 buf = &state->dts_buffer[desc.dtbd_cpu];
15576 15600 } else {
15577 15601 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
15578 15602 }
15579 15603
15580 15604 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
15581 15605 size_t sz = buf->dtb_offset;
15582 15606
15583 15607 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
15584 15608 mutex_exit(&dtrace_lock);
15585 15609 return (EBUSY);
15586 15610 }
15587 15611
15588 15612 /*
15589 15613 * If this buffer has already been consumed, we're
15590 15614 * going to indicate that there's nothing left here
15591 15615 * to consume.
15592 15616 */
15593 15617 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
15594 15618 mutex_exit(&dtrace_lock);
15595 15619
15596 15620 desc.dtbd_size = 0;
15597 15621 desc.dtbd_drops = 0;
15598 15622 desc.dtbd_errors = 0;
15599 15623 desc.dtbd_oldest = 0;
15600 15624 sz = sizeof (desc);
15601 15625
15602 15626 if (copyout(&desc, (void *)arg, sz) != 0)
15603 15627 return (EFAULT);
15604 15628
15605 15629 return (0);
15606 15630 }
15607 15631
15608 15632 /*
15609 15633 * If this is a ring buffer that has wrapped, we want
15610 15634 * to copy the whole thing out.
15611 15635 */
15612 15636 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
15613 15637 dtrace_buffer_polish(buf);
15614 15638 sz = buf->dtb_size;
15615 15639 }
15616 15640
15617 15641 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
15618 15642 mutex_exit(&dtrace_lock);
15619 15643 return (EFAULT);
15620 15644 }
15621 15645
15622 15646 desc.dtbd_size = sz;
15623 15647 desc.dtbd_drops = buf->dtb_drops;
15624 15648 desc.dtbd_errors = buf->dtb_errors;
15625 15649 desc.dtbd_oldest = buf->dtb_xamot_offset;
15626 15650
15627 15651 mutex_exit(&dtrace_lock);
15628 15652
15629 15653 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15630 15654 return (EFAULT);
15631 15655
15632 15656 buf->dtb_flags |= DTRACEBUF_CONSUMED;
15633 15657
15634 15658 return (0);
15635 15659 }
15636 15660
15637 15661 if (buf->dtb_tomax == NULL) {
15638 15662 ASSERT(buf->dtb_xamot == NULL);
15639 15663 mutex_exit(&dtrace_lock);
15640 15664 return (ENOENT);
15641 15665 }
15642 15666
15643 15667 cached = buf->dtb_tomax;
15644 15668 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
15645 15669
15646 15670 dtrace_xcall(desc.dtbd_cpu,
15647 15671 (dtrace_xcall_t)dtrace_buffer_switch, buf);
15648 15672
15649 15673 state->dts_errors += buf->dtb_xamot_errors;
15650 15674
15651 15675 /*
15652 15676 * If the buffers did not actually switch, then the cross call
15653 15677 * did not take place -- presumably because the given CPU is
15654 15678 * not in the ready set. If this is the case, we'll return
15655 15679 * ENOENT.
15656 15680 */
15657 15681 if (buf->dtb_tomax == cached) {
15658 15682 ASSERT(buf->dtb_xamot != cached);
15659 15683 mutex_exit(&dtrace_lock);
15660 15684 return (ENOENT);
15661 15685 }
15662 15686
15663 15687 ASSERT(cached == buf->dtb_xamot);
15664 15688
15665 15689 /*
15666 15690 * We have our snapshot; now copy it out.
15667 15691 */
15668 15692 if (copyout(buf->dtb_xamot, desc.dtbd_data,
15669 15693 buf->dtb_xamot_offset) != 0) {
15670 15694 mutex_exit(&dtrace_lock);
15671 15695 return (EFAULT);
15672 15696 }
15673 15697
15674 15698 desc.dtbd_size = buf->dtb_xamot_offset;
15675 15699 desc.dtbd_drops = buf->dtb_xamot_drops;
15676 15700 desc.dtbd_errors = buf->dtb_xamot_errors;
15677 15701 desc.dtbd_oldest = 0;
15678 15702
15679 15703 mutex_exit(&dtrace_lock);
15680 15704
15681 15705 /*
15682 15706 * Finally, copy out the buffer description.
15683 15707 */
15684 15708 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15685 15709 return (EFAULT);
15686 15710
15687 15711 return (0);
15688 15712 }
15689 15713
15690 15714 case DTRACEIOC_CONF: {
15691 15715 dtrace_conf_t conf;
15692 15716
15693 15717 bzero(&conf, sizeof (conf));
15694 15718 conf.dtc_difversion = DIF_VERSION;
15695 15719 conf.dtc_difintregs = DIF_DIR_NREGS;
15696 15720 conf.dtc_diftupregs = DIF_DTR_NREGS;
15697 15721 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
15698 15722
15699 15723 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
15700 15724 return (EFAULT);
15701 15725
15702 15726 return (0);
15703 15727 }
15704 15728
15705 15729 case DTRACEIOC_STATUS: {
15706 15730 dtrace_status_t stat;
15707 15731 dtrace_dstate_t *dstate;
15708 15732 int i, j;
15709 15733 uint64_t nerrs;
15710 15734
15711 15735 /*
15712 15736 * See the comment in dtrace_state_deadman() for the reason
15713 15737 * for setting dts_laststatus to INT64_MAX before setting
15714 15738 * it to the correct value.
15715 15739 */
15716 15740 state->dts_laststatus = INT64_MAX;
15717 15741 dtrace_membar_producer();
15718 15742 state->dts_laststatus = dtrace_gethrtime();
15719 15743
15720 15744 bzero(&stat, sizeof (stat));
15721 15745
15722 15746 mutex_enter(&dtrace_lock);
15723 15747
15724 15748 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
15725 15749 mutex_exit(&dtrace_lock);
15726 15750 return (ENOENT);
15727 15751 }
15728 15752
15729 15753 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
15730 15754 stat.dtst_exiting = 1;
15731 15755
15732 15756 nerrs = state->dts_errors;
15733 15757 dstate = &state->dts_vstate.dtvs_dynvars;
15734 15758
15735 15759 for (i = 0; i < NCPU; i++) {
15736 15760 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
15737 15761
15738 15762 stat.dtst_dyndrops += dcpu->dtdsc_drops;
15739 15763 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
15740 15764 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
15741 15765
15742 15766 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
15743 15767 stat.dtst_filled++;
15744 15768
15745 15769 nerrs += state->dts_buffer[i].dtb_errors;
15746 15770
15747 15771 for (j = 0; j < state->dts_nspeculations; j++) {
15748 15772 dtrace_speculation_t *spec;
15749 15773 dtrace_buffer_t *buf;
15750 15774
15751 15775 spec = &state->dts_speculations[j];
15752 15776 buf = &spec->dtsp_buffer[i];
15753 15777 stat.dtst_specdrops += buf->dtb_xamot_drops;
15754 15778 }
15755 15779 }
15756 15780
15757 15781 stat.dtst_specdrops_busy = state->dts_speculations_busy;
15758 15782 stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
15759 15783 stat.dtst_stkstroverflows = state->dts_stkstroverflows;
15760 15784 stat.dtst_dblerrors = state->dts_dblerrors;
15761 15785 stat.dtst_killed =
15762 15786 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
15763 15787 stat.dtst_errors = nerrs;
15764 15788
15765 15789 mutex_exit(&dtrace_lock);
15766 15790
15767 15791 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
15768 15792 return (EFAULT);
15769 15793
15770 15794 return (0);
15771 15795 }
15772 15796
15773 15797 case DTRACEIOC_FORMAT: {
15774 15798 dtrace_fmtdesc_t fmt;
15775 15799 char *str;
15776 15800 int len;
15777 15801
15778 15802 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
15779 15803 return (EFAULT);
15780 15804
15781 15805 mutex_enter(&dtrace_lock);
15782 15806
15783 15807 if (fmt.dtfd_format == 0 ||
15784 15808 fmt.dtfd_format > state->dts_nformats) {
15785 15809 mutex_exit(&dtrace_lock);
15786 15810 return (EINVAL);
15787 15811 }
15788 15812
15789 15813 /*
15790 15814 * Format strings are allocated contiguously and they are
15791 15815 * never freed; if a format index is less than the number
15792 15816 * of formats, we can assert that the format map is non-NULL
15793 15817 * and that the format for the specified index is non-NULL.
15794 15818 */
15795 15819 ASSERT(state->dts_formats != NULL);
15796 15820 str = state->dts_formats[fmt.dtfd_format - 1];
15797 15821 ASSERT(str != NULL);
15798 15822
15799 15823 len = strlen(str) + 1;
15800 15824
15801 15825 if (len > fmt.dtfd_length) {
15802 15826 fmt.dtfd_length = len;
15803 15827
15804 15828 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
15805 15829 mutex_exit(&dtrace_lock);
15806 15830 return (EINVAL);
15807 15831 }
15808 15832 } else {
15809 15833 if (copyout(str, fmt.dtfd_string, len) != 0) {
15810 15834 mutex_exit(&dtrace_lock);
15811 15835 return (EINVAL);
15812 15836 }
15813 15837 }
15814 15838
15815 15839 mutex_exit(&dtrace_lock);
15816 15840 return (0);
15817 15841 }
15818 15842
15819 15843 default:
15820 15844 break;
15821 15845 }
15822 15846
15823 15847 return (ENOTTY);
15824 15848 }
15825 15849
15826 15850 /*ARGSUSED*/
15827 15851 static int
15828 15852 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
15829 15853 {
15830 15854 dtrace_state_t *state;
15831 15855
15832 15856 switch (cmd) {
15833 15857 case DDI_DETACH:
15834 15858 break;
15835 15859
15836 15860 case DDI_SUSPEND:
15837 15861 return (DDI_SUCCESS);
15838 15862
15839 15863 default:
15840 15864 return (DDI_FAILURE);
15841 15865 }
15842 15866
15843 15867 mutex_enter(&cpu_lock);
15844 15868 mutex_enter(&dtrace_provider_lock);
15845 15869 mutex_enter(&dtrace_lock);
15846 15870
15847 15871 ASSERT(dtrace_opens == 0);
15848 15872
15849 15873 if (dtrace_helpers > 0) {
15850 15874 mutex_exit(&dtrace_provider_lock);
15851 15875 mutex_exit(&dtrace_lock);
15852 15876 mutex_exit(&cpu_lock);
15853 15877 return (DDI_FAILURE);
15854 15878 }
15855 15879
15856 15880 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
15857 15881 mutex_exit(&dtrace_provider_lock);
15858 15882 mutex_exit(&dtrace_lock);
15859 15883 mutex_exit(&cpu_lock);
15860 15884 return (DDI_FAILURE);
15861 15885 }
15862 15886
15863 15887 dtrace_provider = NULL;
15864 15888
15865 15889 if ((state = dtrace_anon_grab()) != NULL) {
15866 15890 /*
15867 15891 * If there were ECBs on this state, the provider should
15868 15892 * have not been allowed to detach; assert that there is
15869 15893 * none.
15870 15894 */
15871 15895 ASSERT(state->dts_necbs == 0);
15872 15896 dtrace_state_destroy(state);
15873 15897
15874 15898 /*
15875 15899 * If we're being detached with anonymous state, we need to
15876 15900 * indicate to the kernel debugger that DTrace is now inactive.
15877 15901 */
15878 15902 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15879 15903 }
15880 15904
15881 15905 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
15882 15906 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
15883 15907 dtrace_cpu_init = NULL;
15884 15908 dtrace_helpers_cleanup = NULL;
15885 15909 dtrace_helpers_fork = NULL;
15886 15910 dtrace_cpustart_init = NULL;
15887 15911 dtrace_cpustart_fini = NULL;
15888 15912 dtrace_debugger_init = NULL;
15889 15913 dtrace_debugger_fini = NULL;
15890 15914 dtrace_modload = NULL;
15891 15915 dtrace_modunload = NULL;
15892 15916
15893 15917 mutex_exit(&cpu_lock);
15894 15918
15895 15919 if (dtrace_helptrace_enabled) {
15896 15920 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
15897 15921 dtrace_helptrace_buffer = NULL;
15898 15922 }
15899 15923
15900 15924 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
15901 15925 dtrace_probes = NULL;
15902 15926 dtrace_nprobes = 0;
15903 15927
15904 15928 dtrace_hash_destroy(dtrace_bymod);
15905 15929 dtrace_hash_destroy(dtrace_byfunc);
15906 15930 dtrace_hash_destroy(dtrace_byname);
15907 15931 dtrace_bymod = NULL;
15908 15932 dtrace_byfunc = NULL;
15909 15933 dtrace_byname = NULL;
15910 15934
15911 15935 kmem_cache_destroy(dtrace_state_cache);
15912 15936 vmem_destroy(dtrace_minor);
15913 15937 vmem_destroy(dtrace_arena);
15914 15938
15915 15939 if (dtrace_toxrange != NULL) {
15916 15940 kmem_free(dtrace_toxrange,
15917 15941 dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
15918 15942 dtrace_toxrange = NULL;
15919 15943 dtrace_toxranges = 0;
15920 15944 dtrace_toxranges_max = 0;
15921 15945 }
15922 15946
15923 15947 ddi_remove_minor_node(dtrace_devi, NULL);
15924 15948 dtrace_devi = NULL;
15925 15949
15926 15950 ddi_soft_state_fini(&dtrace_softstate);
15927 15951
15928 15952 ASSERT(dtrace_vtime_references == 0);
15929 15953 ASSERT(dtrace_opens == 0);
15930 15954 ASSERT(dtrace_retained == NULL);
15931 15955
15932 15956 mutex_exit(&dtrace_lock);
15933 15957 mutex_exit(&dtrace_provider_lock);
15934 15958
15935 15959 /*
15936 15960 * We don't destroy the task queue until after we have dropped our
15937 15961 * locks (taskq_destroy() may block on running tasks). To prevent
15938 15962 * attempting to do work after we have effectively detached but before
15939 15963 * the task queue has been destroyed, all tasks dispatched via the
15940 15964 * task queue must check that DTrace is still attached before
15941 15965 * performing any operation.
15942 15966 */
15943 15967 taskq_destroy(dtrace_taskq);
15944 15968 dtrace_taskq = NULL;
15945 15969
15946 15970 return (DDI_SUCCESS);
15947 15971 }
15948 15972
15949 15973 /*ARGSUSED*/
15950 15974 static int
15951 15975 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
15952 15976 {
15953 15977 int error;
15954 15978
15955 15979 switch (infocmd) {
15956 15980 case DDI_INFO_DEVT2DEVINFO:
15957 15981 *result = (void *)dtrace_devi;
15958 15982 error = DDI_SUCCESS;
15959 15983 break;
15960 15984 case DDI_INFO_DEVT2INSTANCE:
15961 15985 *result = (void *)0;
15962 15986 error = DDI_SUCCESS;
15963 15987 break;
15964 15988 default:
15965 15989 error = DDI_FAILURE;
15966 15990 }
15967 15991 return (error);
15968 15992 }
15969 15993
15970 15994 static struct cb_ops dtrace_cb_ops = {
15971 15995 dtrace_open, /* open */
15972 15996 dtrace_close, /* close */
15973 15997 nulldev, /* strategy */
15974 15998 nulldev, /* print */
15975 15999 nodev, /* dump */
15976 16000 nodev, /* read */
15977 16001 nodev, /* write */
15978 16002 dtrace_ioctl, /* ioctl */
15979 16003 nodev, /* devmap */
15980 16004 nodev, /* mmap */
15981 16005 nodev, /* segmap */
15982 16006 nochpoll, /* poll */
15983 16007 ddi_prop_op, /* cb_prop_op */
15984 16008 0, /* streamtab */
15985 16009 D_NEW | D_MP /* Driver compatibility flag */
15986 16010 };
15987 16011
15988 16012 static struct dev_ops dtrace_ops = {
15989 16013 DEVO_REV, /* devo_rev */
15990 16014 0, /* refcnt */
15991 16015 dtrace_info, /* get_dev_info */
15992 16016 nulldev, /* identify */
15993 16017 nulldev, /* probe */
15994 16018 dtrace_attach, /* attach */
15995 16019 dtrace_detach, /* detach */
15996 16020 nodev, /* reset */
15997 16021 &dtrace_cb_ops, /* driver operations */
15998 16022 NULL, /* bus operations */
15999 16023 nodev, /* dev power */
16000 16024 ddi_quiesce_not_needed, /* quiesce */
16001 16025 };
16002 16026
16003 16027 static struct modldrv modldrv = {
16004 16028 &mod_driverops, /* module type (this is a pseudo driver) */
16005 16029 "Dynamic Tracing", /* name of module */
16006 16030 &dtrace_ops, /* driver ops */
16007 16031 };
16008 16032
16009 16033 static struct modlinkage modlinkage = {
16010 16034 MODREV_1,
16011 16035 (void *)&modldrv,
16012 16036 NULL
16013 16037 };
16014 16038
16015 16039 int
16016 16040 _init(void)
16017 16041 {
16018 16042 return (mod_install(&modlinkage));
16019 16043 }
16020 16044
16021 16045 int
16022 16046 _info(struct modinfo *modinfop)
16023 16047 {
16024 16048 return (mod_info(&modlinkage, modinfop));
16025 16049 }
16026 16050
16027 16051 int
16028 16052 _fini(void)
16029 16053 {
16030 16054 return (mod_remove(&modlinkage));
16031 16055 }
↓ open down ↓ |
11047 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX