Print this page
2915 DTrace in a zone should see "cpu", "curpsinfo", et al
2916 DTrace in a zone should be able to access fds[]
2917 DTrace in a zone should have limited provider access
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/dtrace/dtrace.c
+++ new/usr/src/uts/common/dtrace/dtrace.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 25 * Copyright (c) 2012 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * DTrace - Dynamic Tracing for Solaris
30 30 *
31 31 * This is the implementation of the Solaris Dynamic Tracing framework
32 32 * (DTrace). The user-visible interface to DTrace is described at length in
33 33 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
34 34 * library, the in-kernel DTrace framework, and the DTrace providers are
35 35 * described in the block comments in the <sys/dtrace.h> header file. The
36 36 * internal architecture of DTrace is described in the block comments in the
37 37 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
38 38 * implementation very much assume mastery of all of these sources; if one has
39 39 * an unanswered question about the implementation, one should consult them
40 40 * first.
41 41 *
42 42 * The functions here are ordered roughly as follows:
43 43 *
44 44 * - Probe context functions
45 45 * - Probe hashing functions
46 46 * - Non-probe context utility functions
47 47 * - Matching functions
48 48 * - Provider-to-Framework API functions
49 49 * - Probe management functions
50 50 * - DIF object functions
51 51 * - Format functions
52 52 * - Predicate functions
53 53 * - ECB functions
54 54 * - Buffer functions
55 55 * - Enabling functions
56 56 * - DOF functions
57 57 * - Anonymous enabling functions
58 58 * - Consumer state functions
59 59 * - Helper functions
60 60 * - Hook functions
61 61 * - Driver cookbook functions
62 62 *
63 63 * Each group of functions begins with a block comment labelled the "DTrace
64 64 * [Group] Functions", allowing one to find each block by searching forward
65 65 * on capital-f functions.
66 66 */
67 67 #include <sys/errno.h>
68 68 #include <sys/stat.h>
69 69 #include <sys/modctl.h>
70 70 #include <sys/conf.h>
71 71 #include <sys/systm.h>
72 72 #include <sys/ddi.h>
73 73 #include <sys/sunddi.h>
74 74 #include <sys/cpuvar.h>
75 75 #include <sys/kmem.h>
76 76 #include <sys/strsubr.h>
77 77 #include <sys/sysmacros.h>
78 78 #include <sys/dtrace_impl.h>
79 79 #include <sys/atomic.h>
80 80 #include <sys/cmn_err.h>
81 81 #include <sys/mutex_impl.h>
82 82 #include <sys/rwlock_impl.h>
83 83 #include <sys/ctf_api.h>
84 84 #include <sys/panic.h>
85 85 #include <sys/priv_impl.h>
86 86 #include <sys/policy.h>
87 87 #include <sys/cred_impl.h>
88 88 #include <sys/procfs_isa.h>
89 89 #include <sys/taskq.h>
90 90 #include <sys/mkdev.h>
91 91 #include <sys/kdi.h>
92 92 #include <sys/zone.h>
93 93 #include <sys/socket.h>
94 94 #include <netinet/in.h>
95 95
96 96 /*
97 97 * DTrace Tunable Variables
98 98 *
99 99 * The following variables may be tuned by adding a line to /etc/system that
100 100 * includes both the name of the DTrace module ("dtrace") and the name of the
101 101 * variable. For example:
102 102 *
103 103 * set dtrace:dtrace_destructive_disallow = 1
104 104 *
105 105 * In general, the only variables that one should be tuning this way are those
106 106 * that affect system-wide DTrace behavior, and for which the default behavior
107 107 * is undesirable. Most of these variables are tunable on a per-consumer
108 108 * basis using DTrace options, and need not be tuned on a system-wide basis.
109 109 * When tuning these variables, avoid pathological values; while some attempt
110 110 * is made to verify the integrity of these variables, they are not considered
111 111 * part of the supported interface to DTrace, and they are therefore not
112 112 * checked comprehensively. Further, these variables should not be tuned
113 113 * dynamically via "mdb -kw" or other means; they should only be tuned via
114 114 * /etc/system.
115 115 */
116 116 int dtrace_destructive_disallow = 0;
117 117 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
118 118 size_t dtrace_difo_maxsize = (256 * 1024);
119 119 dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
120 120 size_t dtrace_global_maxsize = (16 * 1024);
121 121 size_t dtrace_actions_max = (16 * 1024);
122 122 size_t dtrace_retain_max = 1024;
123 123 dtrace_optval_t dtrace_helper_actions_max = 1024;
124 124 dtrace_optval_t dtrace_helper_providers_max = 32;
125 125 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
126 126 size_t dtrace_strsize_default = 256;
127 127 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
128 128 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
129 129 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
130 130 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
131 131 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
132 132 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
133 133 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
134 134 dtrace_optval_t dtrace_nspec_default = 1;
135 135 dtrace_optval_t dtrace_specsize_default = 32 * 1024;
136 136 dtrace_optval_t dtrace_stackframes_default = 20;
137 137 dtrace_optval_t dtrace_ustackframes_default = 20;
138 138 dtrace_optval_t dtrace_jstackframes_default = 50;
139 139 dtrace_optval_t dtrace_jstackstrsize_default = 512;
140 140 int dtrace_msgdsize_max = 128;
141 141 hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
142 142 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
143 143 int dtrace_devdepth_max = 32;
144 144 int dtrace_err_verbose;
145 145 hrtime_t dtrace_deadman_interval = NANOSEC;
146 146 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
147 147 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
148 148 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
149 149
150 150 /*
151 151 * DTrace External Variables
152 152 *
153 153 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
154 154 * available to DTrace consumers via the backtick (`) syntax. One of these,
155 155 * dtrace_zero, is made deliberately so: it is provided as a source of
156 156 * well-known, zero-filled memory. While this variable is not documented,
157 157 * it is used by some translators as an implementation detail.
158 158 */
159 159 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
160 160
161 161 /*
162 162 * DTrace Internal Variables
163 163 */
↓ open down ↓ |
163 lines elided |
↑ open up ↑ |
164 164 static dev_info_t *dtrace_devi; /* device info */
165 165 static vmem_t *dtrace_arena; /* probe ID arena */
166 166 static vmem_t *dtrace_minor; /* minor number arena */
167 167 static taskq_t *dtrace_taskq; /* task queue */
168 168 static dtrace_probe_t **dtrace_probes; /* array of all probes */
169 169 static int dtrace_nprobes; /* number of probes */
170 170 static dtrace_provider_t *dtrace_provider; /* provider list */
171 171 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
172 172 static int dtrace_opens; /* number of opens */
173 173 static int dtrace_helpers; /* number of helpers */
174 +static int dtrace_getf; /* number of unpriv getf()s */
174 175 static void *dtrace_softstate; /* softstate pointer */
175 176 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
176 177 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
177 178 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
178 179 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
179 180 static int dtrace_toxranges; /* number of toxic ranges */
180 181 static int dtrace_toxranges_max; /* size of toxic range array */
181 182 static dtrace_anon_t dtrace_anon; /* anonymous enabling */
182 183 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
183 184 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
184 185 static kthread_t *dtrace_panicked; /* panicking thread */
185 186 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
186 187 static dtrace_genid_t dtrace_probegen; /* current probe generation */
187 188 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
188 189 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
189 190 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
190 191 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
191 192 static int dtrace_dynvar_failclean; /* dynvars failed to clean */
192 193
193 194 /*
194 195 * DTrace Locking
195 196 * DTrace is protected by three (relatively coarse-grained) locks:
196 197 *
197 198 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
198 199 * including enabling state, probes, ECBs, consumer state, helper state,
199 200 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
200 201 * probe context is lock-free -- synchronization is handled via the
201 202 * dtrace_sync() cross call mechanism.
202 203 *
203 204 * (2) dtrace_provider_lock is required when manipulating provider state, or
204 205 * when provider state must be held constant.
205 206 *
206 207 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
207 208 * when meta provider state must be held constant.
208 209 *
209 210 * The lock ordering between these three locks is dtrace_meta_lock before
210 211 * dtrace_provider_lock before dtrace_lock. (In particular, there are
211 212 * several places where dtrace_provider_lock is held by the framework as it
212 213 * calls into the providers -- which then call back into the framework,
213 214 * grabbing dtrace_lock.)
214 215 *
215 216 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
216 217 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
217 218 * role as a coarse-grained lock; it is acquired before both of these locks.
218 219 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
219 220 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
220 221 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
221 222 * acquired _between_ dtrace_provider_lock and dtrace_lock.
222 223 */
223 224 static kmutex_t dtrace_lock; /* probe state lock */
224 225 static kmutex_t dtrace_provider_lock; /* provider state lock */
225 226 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
226 227
227 228 /*
228 229 * DTrace Provider Variables
229 230 *
230 231 * These are the variables relating to DTrace as a provider (that is, the
231 232 * provider of the BEGIN, END, and ERROR probes).
232 233 */
233 234 static dtrace_pattr_t dtrace_provider_attr = {
234 235 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
235 236 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
236 237 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
237 238 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
238 239 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
239 240 };
240 241
241 242 static void
242 243 dtrace_nullop(void)
243 244 {}
244 245
245 246 static int
246 247 dtrace_enable_nullop(void)
247 248 {
248 249 return (0);
249 250 }
250 251
251 252 static dtrace_pops_t dtrace_provider_ops = {
252 253 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
253 254 (void (*)(void *, struct modctl *))dtrace_nullop,
254 255 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
255 256 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
256 257 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
257 258 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
258 259 NULL,
259 260 NULL,
260 261 NULL,
261 262 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
262 263 };
263 264
264 265 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
265 266 static dtrace_id_t dtrace_probeid_end; /* special END probe */
266 267 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
267 268
268 269 /*
269 270 * DTrace Helper Tracing Variables
270 271 */
271 272 uint32_t dtrace_helptrace_next = 0;
272 273 uint32_t dtrace_helptrace_nlocals;
273 274 char *dtrace_helptrace_buffer;
274 275 int dtrace_helptrace_bufsize = 512 * 1024;
275 276
276 277 #ifdef DEBUG
277 278 int dtrace_helptrace_enabled = 1;
278 279 #else
279 280 int dtrace_helptrace_enabled = 0;
280 281 #endif
281 282
282 283 /*
283 284 * DTrace Error Hashing
284 285 *
285 286 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
286 287 * table. This is very useful for checking coverage of tests that are
287 288 * expected to induce DIF or DOF processing errors, and may be useful for
288 289 * debugging problems in the DIF code generator or in DOF generation . The
289 290 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
290 291 */
291 292 #ifdef DEBUG
292 293 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
293 294 static const char *dtrace_errlast;
294 295 static kthread_t *dtrace_errthread;
295 296 static kmutex_t dtrace_errlock;
296 297 #endif
297 298
298 299 /*
299 300 * DTrace Macros and Constants
300 301 *
301 302 * These are various macros that are useful in various spots in the
302 303 * implementation, along with a few random constants that have no meaning
303 304 * outside of the implementation. There is no real structure to this cpp
304 305 * mishmash -- but is there ever?
305 306 */
306 307 #define DTRACE_HASHSTR(hash, probe) \
307 308 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
308 309
309 310 #define DTRACE_HASHNEXT(hash, probe) \
310 311 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
311 312
312 313 #define DTRACE_HASHPREV(hash, probe) \
313 314 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
314 315
315 316 #define DTRACE_HASHEQ(hash, lhs, rhs) \
316 317 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
317 318 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
318 319
319 320 #define DTRACE_AGGHASHSIZE_SLEW 17
320 321
321 322 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
322 323
323 324 /*
324 325 * The key for a thread-local variable consists of the lower 61 bits of the
325 326 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
326 327 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
327 328 * equal to a variable identifier. This is necessary (but not sufficient) to
328 329 * assure that global associative arrays never collide with thread-local
329 330 * variables. To guarantee that they cannot collide, we must also define the
330 331 * order for keying dynamic variables. That order is:
331 332 *
332 333 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
333 334 *
334 335 * Because the variable-key and the tls-key are in orthogonal spaces, there is
335 336 * no way for a global variable key signature to match a thread-local key
336 337 * signature.
337 338 */
338 339 #define DTRACE_TLS_THRKEY(where) { \
339 340 uint_t intr = 0; \
340 341 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
341 342 for (; actv; actv >>= 1) \
342 343 intr++; \
343 344 ASSERT(intr < (1 << 3)); \
344 345 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
345 346 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
346 347 }
347 348
348 349 #define DT_BSWAP_8(x) ((x) & 0xff)
349 350 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
350 351 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
351 352 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
352 353
353 354 #define DT_MASK_LO 0x00000000FFFFFFFFULL
354 355
355 356 #define DTRACE_STORE(type, tomax, offset, what) \
356 357 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
357 358
358 359 #ifndef __x86
359 360 #define DTRACE_ALIGNCHECK(addr, size, flags) \
360 361 if (addr & (size - 1)) { \
361 362 *flags |= CPU_DTRACE_BADALIGN; \
362 363 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
363 364 return (0); \
364 365 }
365 366 #else
↓ open down ↓ |
182 lines elided |
↑ open up ↑ |
366 367 #define DTRACE_ALIGNCHECK(addr, size, flags)
367 368 #endif
368 369
369 370 /*
370 371 * Test whether a range of memory starting at testaddr of size testsz falls
371 372 * within the range of memory described by addr, sz. We take care to avoid
372 373 * problems with overflow and underflow of the unsigned quantities, and
373 374 * disallow all negative sizes. Ranges of size 0 are allowed.
374 375 */
375 376 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
376 - ((testaddr) - (baseaddr) < (basesz) && \
377 - (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
377 + ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
378 + (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
378 379 (testaddr) + (testsz) >= (testaddr))
379 380
380 381 /*
381 382 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
382 383 * alloc_sz on the righthand side of the comparison in order to avoid overflow
383 384 * or underflow in the comparison with it. This is simpler than the INRANGE
384 385 * check above, because we know that the dtms_scratch_ptr is valid in the
385 386 * range. Allocations of size zero are allowed.
386 387 */
387 388 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
388 389 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
389 390 (mstate)->dtms_scratch_ptr >= (alloc_sz))
390 391
391 392 #define DTRACE_LOADFUNC(bits) \
392 393 /*CSTYLED*/ \
393 394 uint##bits##_t \
394 395 dtrace_load##bits(uintptr_t addr) \
395 396 { \
396 397 size_t size = bits / NBBY; \
397 398 /*CSTYLED*/ \
398 399 uint##bits##_t rval; \
399 400 int i; \
400 401 volatile uint16_t *flags = (volatile uint16_t *) \
401 402 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
402 403 \
403 404 DTRACE_ALIGNCHECK(addr, size, flags); \
404 405 \
405 406 for (i = 0; i < dtrace_toxranges; i++) { \
406 407 if (addr >= dtrace_toxrange[i].dtt_limit) \
407 408 continue; \
408 409 \
409 410 if (addr + size <= dtrace_toxrange[i].dtt_base) \
410 411 continue; \
411 412 \
412 413 /* \
413 414 * This address falls within a toxic region; return 0. \
414 415 */ \
415 416 *flags |= CPU_DTRACE_BADADDR; \
416 417 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
417 418 return (0); \
418 419 } \
419 420 \
420 421 *flags |= CPU_DTRACE_NOFAULT; \
421 422 /*CSTYLED*/ \
422 423 rval = *((volatile uint##bits##_t *)addr); \
423 424 *flags &= ~CPU_DTRACE_NOFAULT; \
424 425 \
425 426 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
426 427 }
427 428
428 429 #ifdef _LP64
429 430 #define dtrace_loadptr dtrace_load64
430 431 #else
431 432 #define dtrace_loadptr dtrace_load32
432 433 #endif
433 434
434 435 #define DTRACE_DYNHASH_FREE 0
435 436 #define DTRACE_DYNHASH_SINK 1
436 437 #define DTRACE_DYNHASH_VALID 2
437 438
438 439 #define DTRACE_MATCH_FAIL -1
439 440 #define DTRACE_MATCH_NEXT 0
440 441 #define DTRACE_MATCH_DONE 1
441 442 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
442 443 #define DTRACE_STATE_ALIGN 64
443 444
444 445 #define DTRACE_FLAGS2FLT(flags) \
445 446 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
446 447 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
447 448 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
448 449 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
449 450 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
450 451 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
451 452 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
452 453 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
453 454 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
454 455 DTRACEFLT_UNKNOWN)
455 456
456 457 #define DTRACEACT_ISSTRING(act) \
457 458 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
458 459 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
459 460
460 461 static size_t dtrace_strlen(const char *, size_t);
461 462 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
462 463 static void dtrace_enabling_provide(dtrace_provider_t *);
463 464 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
464 465 static void dtrace_enabling_matchall(void);
465 466 static void dtrace_enabling_reap(void);
466 467 static dtrace_state_t *dtrace_anon_grab(void);
467 468 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
468 469 dtrace_state_t *, uint64_t, uint64_t);
469 470 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
470 471 static void dtrace_buffer_drop(dtrace_buffer_t *);
471 472 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
472 473 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
473 474 dtrace_state_t *, dtrace_mstate_t *);
474 475 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
475 476 dtrace_optval_t);
476 477 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
477 478 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
479 +static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
480 +static void dtrace_getf_barrier(void);
478 481
479 482 /*
480 483 * DTrace Probe Context Functions
481 484 *
482 485 * These functions are called from probe context. Because probe context is
483 486 * any context in which C may be called, arbitrarily locks may be held,
484 487 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
485 488 * As a result, functions called from probe context may only call other DTrace
486 489 * support functions -- they may not interact at all with the system at large.
487 490 * (Note that the ASSERT macro is made probe-context safe by redefining it in
488 491 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
489 492 * loads are to be performed from probe context, they _must_ be in terms of
490 493 * the safe dtrace_load*() variants.
491 494 *
492 495 * Some functions in this block are not actually called from probe context;
493 496 * for these functions, there will be a comment above the function reading
494 497 * "Note: not called from probe context."
495 498 */
496 499 void
497 500 dtrace_panic(const char *format, ...)
498 501 {
499 502 va_list alist;
500 503
501 504 va_start(alist, format);
502 505 dtrace_vpanic(format, alist);
503 506 va_end(alist);
504 507 }
505 508
506 509 int
507 510 dtrace_assfail(const char *a, const char *f, int l)
508 511 {
509 512 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
510 513
511 514 /*
512 515 * We just need something here that even the most clever compiler
513 516 * cannot optimize away.
514 517 */
515 518 return (a[(uintptr_t)f]);
516 519 }
517 520
518 521 /*
519 522 * Atomically increment a specified error counter from probe context.
520 523 */
521 524 static void
522 525 dtrace_error(uint32_t *counter)
523 526 {
524 527 /*
525 528 * Most counters stored to in probe context are per-CPU counters.
526 529 * However, there are some error conditions that are sufficiently
527 530 * arcane that they don't merit per-CPU storage. If these counters
528 531 * are incremented concurrently on different CPUs, scalability will be
529 532 * adversely affected -- but we don't expect them to be white-hot in a
530 533 * correctly constructed enabling...
531 534 */
532 535 uint32_t oval, nval;
533 536
534 537 do {
535 538 oval = *counter;
536 539
537 540 if ((nval = oval + 1) == 0) {
538 541 /*
539 542 * If the counter would wrap, set it to 1 -- assuring
540 543 * that the counter is never zero when we have seen
541 544 * errors. (The counter must be 32-bits because we
542 545 * aren't guaranteed a 64-bit compare&swap operation.)
543 546 * To save this code both the infamy of being fingered
544 547 * by a priggish news story and the indignity of being
545 548 * the target of a neo-puritan witch trial, we're
546 549 * carefully avoiding any colorful description of the
547 550 * likelihood of this condition -- but suffice it to
548 551 * say that it is only slightly more likely than the
549 552 * overflow of predicate cache IDs, as discussed in
550 553 * dtrace_predicate_create().
551 554 */
552 555 nval = 1;
553 556 }
554 557 } while (dtrace_cas32(counter, oval, nval) != oval);
555 558 }
556 559
557 560 /*
558 561 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
559 562 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
560 563 */
561 564 DTRACE_LOADFUNC(8)
562 565 DTRACE_LOADFUNC(16)
563 566 DTRACE_LOADFUNC(32)
564 567 DTRACE_LOADFUNC(64)
565 568
566 569 static int
567 570 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
568 571 {
569 572 if (dest < mstate->dtms_scratch_base)
570 573 return (0);
571 574
572 575 if (dest + size < dest)
573 576 return (0);
574 577
575 578 if (dest + size > mstate->dtms_scratch_ptr)
576 579 return (0);
577 580
578 581 return (1);
579 582 }
580 583
581 584 static int
582 585 dtrace_canstore_statvar(uint64_t addr, size_t sz,
583 586 dtrace_statvar_t **svars, int nsvars)
584 587 {
585 588 int i;
586 589
587 590 for (i = 0; i < nsvars; i++) {
588 591 dtrace_statvar_t *svar = svars[i];
589 592
590 593 if (svar == NULL || svar->dtsv_size == 0)
591 594 continue;
592 595
593 596 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size))
594 597 return (1);
595 598 }
596 599
597 600 return (0);
598 601 }
599 602
600 603 /*
601 604 * Check to see if the address is within a memory region to which a store may
602 605 * be issued. This includes the DTrace scratch areas, and any DTrace variable
603 606 * region. The caller of dtrace_canstore() is responsible for performing any
604 607 * alignment checks that are needed before stores are actually executed.
605 608 */
606 609 static int
607 610 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
608 611 dtrace_vstate_t *vstate)
609 612 {
610 613 /*
611 614 * First, check to see if the address is in scratch space...
↓ open down ↓ |
124 lines elided |
↑ open up ↑ |
612 615 */
613 616 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
614 617 mstate->dtms_scratch_size))
615 618 return (1);
616 619
617 620 /*
618 621 * Now check to see if it's a dynamic variable. This check will pick
619 622 * up both thread-local variables and any global dynamically-allocated
620 623 * variables.
621 624 */
622 - if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
625 + if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
623 626 vstate->dtvs_dynvars.dtds_size)) {
624 627 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
625 628 uintptr_t base = (uintptr_t)dstate->dtds_base +
626 629 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
627 630 uintptr_t chunkoffs;
628 631
629 632 /*
630 633 * Before we assume that we can store here, we need to make
631 634 * sure that it isn't in our metadata -- storing to our
632 635 * dynamic variable metadata would corrupt our state. For
633 636 * the range to not include any dynamic variable metadata,
634 637 * it must:
635 638 *
636 639 * (1) Start above the hash table that is at the base of
637 640 * the dynamic variable space
638 641 *
639 642 * (2) Have a starting chunk offset that is beyond the
640 643 * dtrace_dynvar_t that is at the base of every chunk
641 644 *
642 645 * (3) Not span a chunk boundary
643 646 *
644 647 */
645 648 if (addr < base)
646 649 return (0);
647 650
648 651 chunkoffs = (addr - base) % dstate->dtds_chunksize;
649 652
650 653 if (chunkoffs < sizeof (dtrace_dynvar_t))
651 654 return (0);
652 655
653 656 if (chunkoffs + sz > dstate->dtds_chunksize)
654 657 return (0);
655 658
656 659 return (1);
657 660 }
658 661
659 662 /*
660 663 * Finally, check the static local and global variables. These checks
661 664 * take the longest, so we perform them last.
662 665 */
663 666 if (dtrace_canstore_statvar(addr, sz,
664 667 vstate->dtvs_locals, vstate->dtvs_nlocals))
665 668 return (1);
666 669
667 670 if (dtrace_canstore_statvar(addr, sz,
668 671 vstate->dtvs_globals, vstate->dtvs_nglobals))
669 672 return (1);
670 673
671 674 return (0);
672 675 }
673 676
674 677
675 678 /*
676 679 * Convenience routine to check to see if the address is within a memory
677 680 * region in which a load may be issued given the user's privilege level;
678 681 * if not, it sets the appropriate error flags and loads 'addr' into the
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
679 682 * illegal value slot.
680 683 *
681 684 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
682 685 * appropriate memory access protection.
683 686 */
684 687 static int
685 688 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
686 689 dtrace_vstate_t *vstate)
687 690 {
688 691 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
692 + file_t *fp;
689 693
690 694 /*
691 695 * If we hold the privilege to read from kernel memory, then
692 696 * everything is readable.
693 697 */
694 698 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
695 699 return (1);
696 700
697 701 /*
698 702 * You can obviously read that which you can store.
699 703 */
700 704 if (dtrace_canstore(addr, sz, mstate, vstate))
701 705 return (1);
702 706
703 707 /*
704 708 * We're allowed to read from our own string table.
705 709 */
706 - if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
710 + if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
707 711 mstate->dtms_difo->dtdo_strlen))
708 712 return (1);
709 713
714 + if (vstate->dtvs_state != NULL &&
715 + dtrace_priv_proc(vstate->dtvs_state, mstate)) {
716 + proc_t *p;
717 +
718 + /*
719 + * When we have privileges to the current process, there are
720 + * several context-related kernel structures that are safe to
721 + * read, even absent the privilege to read from kernel memory.
722 + * These reads are safe because these structures contain only
723 + * state that (1) we're permitted to read, (2) is harmless or
724 + * (3) contains pointers to additional kernel state that we're
725 + * not permitted to read (and as such, do not present an
726 + * opportunity for privilege escalation). Finally (and
727 + * critically), because of the nature of their relation with
728 + * the current thread context, the memory associated with these
729 + * structures cannot change over the duration of probe context,
730 + * and it is therefore impossible for this memory to be
731 + * deallocated and reallocated as something else while it's
732 + * being operated upon.
733 + */
734 + if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
735 + return (1);
736 +
737 + if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
738 + sz, curthread->t_procp, sizeof (proc_t))) {
739 + return (1);
740 + }
741 +
742 + if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
743 + curthread->t_cred, sizeof (cred_t))) {
744 + return (1);
745 + }
746 +
747 + if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
748 + &(p->p_pidp->pid_id), sizeof (pid_t))) {
749 + return (1);
750 + }
751 +
752 + if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
753 + curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
754 + return (1);
755 + }
756 + }
757 +
758 + if ((fp = mstate->dtms_getf) != NULL) {
759 + uintptr_t psz = sizeof (void *);
760 + vnode_t *vp;
761 + vnodeops_t *op;
762 +
763 + /*
764 + * When getf() returns a file_t, the enabling is implicitly
765 + * granted the (transient) right to read the returned file_t
766 + * as well as the v_path and v_op->vnop_name of the underlying
767 + * vnode. These accesses are allowed after a successful
768 + * getf() because the members that they refer to cannot change
769 + * once set -- and the barrier logic in the kernel's closef()
770 + * path assures that the file_t and its referenced vode_t
771 + * cannot themselves be stale (that is, it impossible for
772 + * either dtms_getf itself or its f_vnode member to reference
773 + * freed memory).
774 + */
775 + if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
776 + return (1);
777 +
778 + if ((vp = fp->f_vnode) != NULL) {
779 + if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
780 + return (1);
781 +
782 + if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
783 + vp->v_path, strlen(vp->v_path) + 1)) {
784 + return (1);
785 + }
786 +
787 + if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
788 + return (1);
789 +
790 + if ((op = vp->v_op) != NULL &&
791 + DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
792 + return (1);
793 + }
794 +
795 + if (op != NULL && op->vnop_name != NULL &&
796 + DTRACE_INRANGE(addr, sz, op->vnop_name,
797 + strlen(op->vnop_name) + 1)) {
798 + return (1);
799 + }
800 + }
801 + }
802 +
710 803 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
711 804 *illval = addr;
712 805 return (0);
713 806 }
714 807
715 808 /*
716 809 * Convenience routine to check to see if a given string is within a memory
717 810 * region in which a load may be issued given the user's privilege level;
718 811 * this exists so that we don't need to issue unnecessary dtrace_strlen()
719 812 * calls in the event that the user has all privileges.
720 813 */
721 814 static int
722 815 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
723 816 dtrace_vstate_t *vstate)
724 817 {
725 818 size_t strsz;
726 819
727 820 /*
728 821 * If we hold the privilege to read from kernel memory, then
729 822 * everything is readable.
730 823 */
731 824 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
732 825 return (1);
733 826
734 827 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz);
735 828 if (dtrace_canload(addr, strsz, mstate, vstate))
736 829 return (1);
737 830
738 831 return (0);
739 832 }
740 833
741 834 /*
742 835 * Convenience routine to check to see if a given variable is within a memory
743 836 * region in which a load may be issued given the user's privilege level.
744 837 */
745 838 static int
746 839 dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
747 840 dtrace_vstate_t *vstate)
748 841 {
749 842 size_t sz;
750 843 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
751 844
752 845 /*
753 846 * If we hold the privilege to read from kernel memory, then
754 847 * everything is readable.
755 848 */
756 849 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
757 850 return (1);
758 851
759 852 if (type->dtdt_kind == DIF_TYPE_STRING)
760 853 sz = dtrace_strlen(src,
761 854 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
762 855 else
763 856 sz = type->dtdt_size;
764 857
765 858 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
766 859 }
767 860
768 861 /*
769 862 * Compare two strings using safe loads.
770 863 */
771 864 static int
772 865 dtrace_strncmp(char *s1, char *s2, size_t limit)
773 866 {
774 867 uint8_t c1, c2;
775 868 volatile uint16_t *flags;
776 869
777 870 if (s1 == s2 || limit == 0)
778 871 return (0);
779 872
780 873 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
781 874
782 875 do {
783 876 if (s1 == NULL) {
784 877 c1 = '\0';
785 878 } else {
786 879 c1 = dtrace_load8((uintptr_t)s1++);
787 880 }
788 881
789 882 if (s2 == NULL) {
790 883 c2 = '\0';
791 884 } else {
792 885 c2 = dtrace_load8((uintptr_t)s2++);
793 886 }
794 887
795 888 if (c1 != c2)
796 889 return (c1 - c2);
797 890 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
798 891
799 892 return (0);
800 893 }
801 894
802 895 /*
803 896 * Compute strlen(s) for a string using safe memory accesses. The additional
804 897 * len parameter is used to specify a maximum length to ensure completion.
805 898 */
806 899 static size_t
807 900 dtrace_strlen(const char *s, size_t lim)
808 901 {
809 902 uint_t len;
810 903
811 904 for (len = 0; len != lim; len++) {
812 905 if (dtrace_load8((uintptr_t)s++) == '\0')
813 906 break;
814 907 }
815 908
816 909 return (len);
817 910 }
818 911
819 912 /*
820 913 * Check if an address falls within a toxic region.
821 914 */
822 915 static int
823 916 dtrace_istoxic(uintptr_t kaddr, size_t size)
824 917 {
825 918 uintptr_t taddr, tsize;
826 919 int i;
827 920
828 921 for (i = 0; i < dtrace_toxranges; i++) {
829 922 taddr = dtrace_toxrange[i].dtt_base;
830 923 tsize = dtrace_toxrange[i].dtt_limit - taddr;
831 924
832 925 if (kaddr - taddr < tsize) {
833 926 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
834 927 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
835 928 return (1);
836 929 }
837 930
838 931 if (taddr - kaddr < size) {
839 932 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
840 933 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
841 934 return (1);
842 935 }
843 936 }
844 937
845 938 return (0);
846 939 }
847 940
848 941 /*
849 942 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
850 943 * memory specified by the DIF program. The dst is assumed to be safe memory
851 944 * that we can store to directly because it is managed by DTrace. As with
852 945 * standard bcopy, overlapping copies are handled properly.
853 946 */
854 947 static void
855 948 dtrace_bcopy(const void *src, void *dst, size_t len)
856 949 {
857 950 if (len != 0) {
858 951 uint8_t *s1 = dst;
859 952 const uint8_t *s2 = src;
860 953
861 954 if (s1 <= s2) {
862 955 do {
863 956 *s1++ = dtrace_load8((uintptr_t)s2++);
864 957 } while (--len != 0);
865 958 } else {
866 959 s2 += len;
867 960 s1 += len;
868 961
869 962 do {
870 963 *--s1 = dtrace_load8((uintptr_t)--s2);
871 964 } while (--len != 0);
872 965 }
873 966 }
874 967 }
875 968
876 969 /*
877 970 * Copy src to dst using safe memory accesses, up to either the specified
878 971 * length, or the point that a nul byte is encountered. The src is assumed to
879 972 * be unsafe memory specified by the DIF program. The dst is assumed to be
880 973 * safe memory that we can store to directly because it is managed by DTrace.
881 974 * Unlike dtrace_bcopy(), overlapping regions are not handled.
882 975 */
883 976 static void
884 977 dtrace_strcpy(const void *src, void *dst, size_t len)
885 978 {
886 979 if (len != 0) {
887 980 uint8_t *s1 = dst, c;
888 981 const uint8_t *s2 = src;
889 982
890 983 do {
891 984 *s1++ = c = dtrace_load8((uintptr_t)s2++);
892 985 } while (--len != 0 && c != '\0');
893 986 }
894 987 }
895 988
896 989 /*
897 990 * Copy src to dst, deriving the size and type from the specified (BYREF)
898 991 * variable type. The src is assumed to be unsafe memory specified by the DIF
899 992 * program. The dst is assumed to be DTrace variable memory that is of the
900 993 * specified type; we assume that we can store to directly.
901 994 */
902 995 static void
903 996 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type)
904 997 {
905 998 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
906 999
907 1000 if (type->dtdt_kind == DIF_TYPE_STRING) {
908 1001 dtrace_strcpy(src, dst, type->dtdt_size);
909 1002 } else {
910 1003 dtrace_bcopy(src, dst, type->dtdt_size);
911 1004 }
912 1005 }
913 1006
914 1007 /*
915 1008 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
916 1009 * unsafe memory specified by the DIF program. The s2 data is assumed to be
917 1010 * safe memory that we can access directly because it is managed by DTrace.
918 1011 */
919 1012 static int
920 1013 dtrace_bcmp(const void *s1, const void *s2, size_t len)
921 1014 {
922 1015 volatile uint16_t *flags;
923 1016
924 1017 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
925 1018
926 1019 if (s1 == s2)
927 1020 return (0);
928 1021
929 1022 if (s1 == NULL || s2 == NULL)
930 1023 return (1);
931 1024
932 1025 if (s1 != s2 && len != 0) {
933 1026 const uint8_t *ps1 = s1;
934 1027 const uint8_t *ps2 = s2;
935 1028
936 1029 do {
937 1030 if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
938 1031 return (1);
939 1032 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
940 1033 }
941 1034 return (0);
942 1035 }
943 1036
944 1037 /*
945 1038 * Zero the specified region using a simple byte-by-byte loop. Note that this
946 1039 * is for safe DTrace-managed memory only.
947 1040 */
948 1041 static void
949 1042 dtrace_bzero(void *dst, size_t len)
950 1043 {
951 1044 uchar_t *cp;
952 1045
953 1046 for (cp = dst; len != 0; len--)
954 1047 *cp++ = 0;
955 1048 }
956 1049
957 1050 static void
958 1051 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
959 1052 {
960 1053 uint64_t result[2];
961 1054
962 1055 result[0] = addend1[0] + addend2[0];
963 1056 result[1] = addend1[1] + addend2[1] +
964 1057 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
965 1058
966 1059 sum[0] = result[0];
967 1060 sum[1] = result[1];
968 1061 }
969 1062
970 1063 /*
971 1064 * Shift the 128-bit value in a by b. If b is positive, shift left.
972 1065 * If b is negative, shift right.
973 1066 */
974 1067 static void
975 1068 dtrace_shift_128(uint64_t *a, int b)
976 1069 {
977 1070 uint64_t mask;
978 1071
979 1072 if (b == 0)
980 1073 return;
981 1074
982 1075 if (b < 0) {
983 1076 b = -b;
984 1077 if (b >= 64) {
985 1078 a[0] = a[1] >> (b - 64);
986 1079 a[1] = 0;
987 1080 } else {
988 1081 a[0] >>= b;
989 1082 mask = 1LL << (64 - b);
990 1083 mask -= 1;
991 1084 a[0] |= ((a[1] & mask) << (64 - b));
992 1085 a[1] >>= b;
993 1086 }
994 1087 } else {
995 1088 if (b >= 64) {
996 1089 a[1] = a[0] << (b - 64);
997 1090 a[0] = 0;
998 1091 } else {
999 1092 a[1] <<= b;
1000 1093 mask = a[0] >> (64 - b);
1001 1094 a[1] |= mask;
1002 1095 a[0] <<= b;
1003 1096 }
1004 1097 }
1005 1098 }
1006 1099
1007 1100 /*
1008 1101 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1009 1102 * use native multiplication on those, and then re-combine into the
1010 1103 * resulting 128-bit value.
1011 1104 *
1012 1105 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1013 1106 * hi1 * hi2 << 64 +
1014 1107 * hi1 * lo2 << 32 +
1015 1108 * hi2 * lo1 << 32 +
1016 1109 * lo1 * lo2
1017 1110 */
1018 1111 static void
1019 1112 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1020 1113 {
1021 1114 uint64_t hi1, hi2, lo1, lo2;
1022 1115 uint64_t tmp[2];
1023 1116
1024 1117 hi1 = factor1 >> 32;
1025 1118 hi2 = factor2 >> 32;
1026 1119
1027 1120 lo1 = factor1 & DT_MASK_LO;
1028 1121 lo2 = factor2 & DT_MASK_LO;
1029 1122
1030 1123 product[0] = lo1 * lo2;
1031 1124 product[1] = hi1 * hi2;
1032 1125
1033 1126 tmp[0] = hi1 * lo2;
1034 1127 tmp[1] = 0;
1035 1128 dtrace_shift_128(tmp, 32);
1036 1129 dtrace_add_128(product, tmp, product);
1037 1130
1038 1131 tmp[0] = hi2 * lo1;
1039 1132 tmp[1] = 0;
1040 1133 dtrace_shift_128(tmp, 32);
1041 1134 dtrace_add_128(product, tmp, product);
1042 1135 }
1043 1136
1044 1137 /*
1045 1138 * This privilege check should be used by actions and subroutines to
1046 1139 * verify that the user credentials of the process that enabled the
1047 1140 * invoking ECB match the target credentials
1048 1141 */
1049 1142 static int
1050 1143 dtrace_priv_proc_common_user(dtrace_state_t *state)
1051 1144 {
1052 1145 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1053 1146
1054 1147 /*
1055 1148 * We should always have a non-NULL state cred here, since if cred
1056 1149 * is null (anonymous tracing), we fast-path bypass this routine.
1057 1150 */
1058 1151 ASSERT(s_cr != NULL);
1059 1152
1060 1153 if ((cr = CRED()) != NULL &&
1061 1154 s_cr->cr_uid == cr->cr_uid &&
1062 1155 s_cr->cr_uid == cr->cr_ruid &&
1063 1156 s_cr->cr_uid == cr->cr_suid &&
1064 1157 s_cr->cr_gid == cr->cr_gid &&
1065 1158 s_cr->cr_gid == cr->cr_rgid &&
1066 1159 s_cr->cr_gid == cr->cr_sgid)
1067 1160 return (1);
1068 1161
1069 1162 return (0);
1070 1163 }
1071 1164
1072 1165 /*
1073 1166 * This privilege check should be used by actions and subroutines to
1074 1167 * verify that the zone of the process that enabled the invoking ECB
1075 1168 * matches the target credentials
1076 1169 */
1077 1170 static int
↓ open down ↓ |
358 lines elided |
↑ open up ↑ |
1078 1171 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1079 1172 {
1080 1173 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1081 1174
1082 1175 /*
1083 1176 * We should always have a non-NULL state cred here, since if cred
1084 1177 * is null (anonymous tracing), we fast-path bypass this routine.
1085 1178 */
1086 1179 ASSERT(s_cr != NULL);
1087 1180
1088 - if ((cr = CRED()) != NULL &&
1089 - s_cr->cr_zone == cr->cr_zone)
1181 + if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
1090 1182 return (1);
1091 1183
1092 1184 return (0);
1093 1185 }
1094 1186
1095 1187 /*
1096 1188 * This privilege check should be used by actions and subroutines to
1097 1189 * verify that the process has not setuid or changed credentials.
1098 1190 */
1099 1191 static int
1100 1192 dtrace_priv_proc_common_nocd()
1101 1193 {
1102 1194 proc_t *proc;
1103 1195
1104 1196 if ((proc = ttoproc(curthread)) != NULL &&
1105 1197 !(proc->p_flag & SNOCD))
1106 1198 return (1);
1107 1199
1108 1200 return (0);
1109 1201 }
1110 1202
1111 1203 static int
1112 1204 dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
1113 1205 {
1114 1206 int action = state->dts_cred.dcr_action;
1115 1207
1116 1208 if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
1117 1209 goto bad;
1118 1210
1119 1211 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1120 1212 dtrace_priv_proc_common_zone(state) == 0)
1121 1213 goto bad;
1122 1214
1123 1215 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1124 1216 dtrace_priv_proc_common_user(state) == 0)
1125 1217 goto bad;
1126 1218
1127 1219 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1128 1220 dtrace_priv_proc_common_nocd() == 0)
1129 1221 goto bad;
1130 1222
1131 1223 return (1);
1132 1224
1133 1225 bad:
1134 1226 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1135 1227
1136 1228 return (0);
1137 1229 }
1138 1230
1139 1231 static int
1140 1232 dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
1141 1233 {
1142 1234 if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
1143 1235 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1144 1236 return (1);
1145 1237
1146 1238 if (dtrace_priv_proc_common_zone(state) &&
1147 1239 dtrace_priv_proc_common_user(state) &&
1148 1240 dtrace_priv_proc_common_nocd())
1149 1241 return (1);
1150 1242 }
1151 1243
1152 1244 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1153 1245
1154 1246 return (0);
1155 1247 }
1156 1248
1157 1249 static int
1158 1250 dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
1159 1251 {
1160 1252 if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
1161 1253 (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
1162 1254 return (1);
1163 1255
1164 1256 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1165 1257
1166 1258 return (0);
1167 1259 }
1168 1260
1169 1261 static int
1170 1262 dtrace_priv_kernel(dtrace_state_t *state)
1171 1263 {
1172 1264 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1173 1265 return (1);
1174 1266
1175 1267 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1176 1268
1177 1269 return (0);
1178 1270 }
1179 1271
1180 1272 static int
1181 1273 dtrace_priv_kernel_destructive(dtrace_state_t *state)
1182 1274 {
1183 1275 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1184 1276 return (1);
1185 1277
1186 1278 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1187 1279
1188 1280 return (0);
1189 1281 }
1190 1282
1191 1283 /*
1192 1284 * Determine if the dte_cond of the specified ECB allows for processing of
1193 1285 * the current probe to continue. Note that this routine may allow continued
1194 1286 * processing, but with access(es) stripped from the mstate's dtms_access
1195 1287 * field.
1196 1288 */
1197 1289 static int
1198 1290 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1199 1291 dtrace_ecb_t *ecb)
1200 1292 {
1201 1293 dtrace_probe_t *probe = ecb->dte_probe;
↓ open down ↓ |
102 lines elided |
↑ open up ↑ |
1202 1294 dtrace_provider_t *prov = probe->dtpr_provider;
1203 1295 dtrace_pops_t *pops = &prov->dtpv_pops;
1204 1296 int mode = DTRACE_MODE_NOPRIV_DROP;
1205 1297
1206 1298 ASSERT(ecb->dte_cond);
1207 1299
1208 1300 if (pops->dtps_mode != NULL) {
1209 1301 mode = pops->dtps_mode(prov->dtpv_arg,
1210 1302 probe->dtpr_id, probe->dtpr_arg);
1211 1303
1212 - ASSERT((mode & DTRACE_MODE_USER) ||
1213 - (mode & DTRACE_MODE_KERNEL));
1214 - ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
1215 - (mode & DTRACE_MODE_NOPRIV_DROP));
1304 + ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
1305 + ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
1306 + DTRACE_MODE_NOPRIV_DROP));
1216 1307 }
1217 1308
1218 1309 /*
1219 1310 * If the dte_cond bits indicate that this consumer is only allowed to
1220 - * see user-mode firings of this probe, call the provider's dtps_mode()
1221 - * entry point to check that the probe was fired while in a user
1222 - * context. If that's not the case, use the policy specified by the
1223 - * provider to determine if we drop the probe or merely restrict
1224 - * operation.
1311 + * see user-mode firings of this probe, check that the probe was fired
1312 + * while in a user context. If that's not the case, use the policy
1313 + * specified by the provider to determine if we drop the probe or
1314 + * merely restrict operation.
1225 1315 */
1226 1316 if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1227 1317 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1228 1318
1229 1319 if (!(mode & DTRACE_MODE_USER)) {
1230 1320 if (mode & DTRACE_MODE_NOPRIV_DROP)
1231 1321 return (0);
1232 1322
1233 1323 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1234 1324 }
1235 1325 }
1236 1326
1237 1327 /*
1238 1328 * This is more subtle than it looks. We have to be absolutely certain
1239 1329 * that CRED() isn't going to change out from under us so it's only
1240 1330 * legit to examine that structure if we're in constrained situations.
1241 1331 * Currently, the only times we'll this check is if a non-super-user
1242 1332 * has enabled the profile or syscall providers -- providers that
1243 1333 * allow visibility of all processes. For the profile case, the check
1244 1334 * above will ensure that we're examining a user context.
1245 1335 */
1246 1336 if (ecb->dte_cond & DTRACE_COND_OWNER) {
1247 1337 cred_t *cr;
1248 1338 cred_t *s_cr = state->dts_cred.dcr_cred;
1249 1339 proc_t *proc;
1250 1340
1251 1341 ASSERT(s_cr != NULL);
1252 1342
1253 1343 if ((cr = CRED()) == NULL ||
1254 1344 s_cr->cr_uid != cr->cr_uid ||
1255 1345 s_cr->cr_uid != cr->cr_ruid ||
1256 1346 s_cr->cr_uid != cr->cr_suid ||
1257 1347 s_cr->cr_gid != cr->cr_gid ||
1258 1348 s_cr->cr_gid != cr->cr_rgid ||
1259 1349 s_cr->cr_gid != cr->cr_sgid ||
1260 1350 (proc = ttoproc(curthread)) == NULL ||
1261 1351 (proc->p_flag & SNOCD)) {
1262 1352 if (mode & DTRACE_MODE_NOPRIV_DROP)
1263 1353 return (0);
1264 1354
1265 1355 mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
1266 1356 }
1267 1357 }
1268 1358
1269 1359 /*
1270 1360 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
1271 1361 * in our zone, check to see if our mode policy is to restrict rather
1272 1362 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1273 1363 * and DTRACE_ACCESS_ARGS
1274 1364 */
1275 1365 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1276 1366 cred_t *cr;
1277 1367 cred_t *s_cr = state->dts_cred.dcr_cred;
1278 1368
1279 1369 ASSERT(s_cr != NULL);
1280 1370
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
1281 1371 if ((cr = CRED()) == NULL ||
1282 1372 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1283 1373 if (mode & DTRACE_MODE_NOPRIV_DROP)
1284 1374 return (0);
1285 1375
1286 1376 mstate->dtms_access &=
1287 1377 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1288 1378 }
1289 1379 }
1290 1380
1381 + /*
1382 + * By merits of being in this code path at all, we have limited
1383 + * privileges. If the provider has indicated that limited privileges
1384 + * are to denote restricted operation, strip off the ability to access
1385 + * arguments.
1386 + */
1387 + if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
1388 + mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1389 +
1291 1390 return (1);
1292 1391 }
1293 1392
1294 1393 /*
1295 1394 * Note: not called from probe context. This function is called
1296 1395 * asynchronously (and at a regular interval) from outside of probe context to
1297 1396 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1298 1397 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1299 1398 */
1300 1399 void
1301 1400 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1302 1401 {
1303 1402 dtrace_dynvar_t *dirty;
1304 1403 dtrace_dstate_percpu_t *dcpu;
1305 1404 dtrace_dynvar_t **rinsep;
1306 1405 int i, j, work = 0;
1307 1406
1308 1407 for (i = 0; i < NCPU; i++) {
1309 1408 dcpu = &dstate->dtds_percpu[i];
1310 1409 rinsep = &dcpu->dtdsc_rinsing;
1311 1410
1312 1411 /*
1313 1412 * If the dirty list is NULL, there is no dirty work to do.
1314 1413 */
1315 1414 if (dcpu->dtdsc_dirty == NULL)
1316 1415 continue;
1317 1416
1318 1417 if (dcpu->dtdsc_rinsing != NULL) {
1319 1418 /*
1320 1419 * If the rinsing list is non-NULL, then it is because
1321 1420 * this CPU was selected to accept another CPU's
1322 1421 * dirty list -- and since that time, dirty buffers
1323 1422 * have accumulated. This is a highly unlikely
1324 1423 * condition, but we choose to ignore the dirty
1325 1424 * buffers -- they'll be picked up a future cleanse.
1326 1425 */
1327 1426 continue;
1328 1427 }
1329 1428
1330 1429 if (dcpu->dtdsc_clean != NULL) {
1331 1430 /*
1332 1431 * If the clean list is non-NULL, then we're in a
1333 1432 * situation where a CPU has done deallocations (we
1334 1433 * have a non-NULL dirty list) but no allocations (we
1335 1434 * also have a non-NULL clean list). We can't simply
1336 1435 * move the dirty list into the clean list on this
1337 1436 * CPU, yet we also don't want to allow this condition
1338 1437 * to persist, lest a short clean list prevent a
1339 1438 * massive dirty list from being cleaned (which in
1340 1439 * turn could lead to otherwise avoidable dynamic
1341 1440 * drops). To deal with this, we look for some CPU
1342 1441 * with a NULL clean list, NULL dirty list, and NULL
1343 1442 * rinsing list -- and then we borrow this CPU to
1344 1443 * rinse our dirty list.
1345 1444 */
1346 1445 for (j = 0; j < NCPU; j++) {
1347 1446 dtrace_dstate_percpu_t *rinser;
1348 1447
1349 1448 rinser = &dstate->dtds_percpu[j];
1350 1449
1351 1450 if (rinser->dtdsc_rinsing != NULL)
1352 1451 continue;
1353 1452
1354 1453 if (rinser->dtdsc_dirty != NULL)
1355 1454 continue;
1356 1455
1357 1456 if (rinser->dtdsc_clean != NULL)
1358 1457 continue;
1359 1458
1360 1459 rinsep = &rinser->dtdsc_rinsing;
1361 1460 break;
1362 1461 }
1363 1462
1364 1463 if (j == NCPU) {
1365 1464 /*
1366 1465 * We were unable to find another CPU that
1367 1466 * could accept this dirty list -- we are
1368 1467 * therefore unable to clean it now.
1369 1468 */
1370 1469 dtrace_dynvar_failclean++;
1371 1470 continue;
1372 1471 }
1373 1472 }
1374 1473
1375 1474 work = 1;
1376 1475
1377 1476 /*
1378 1477 * Atomically move the dirty list aside.
1379 1478 */
1380 1479 do {
1381 1480 dirty = dcpu->dtdsc_dirty;
1382 1481
1383 1482 /*
1384 1483 * Before we zap the dirty list, set the rinsing list.
1385 1484 * (This allows for a potential assertion in
1386 1485 * dtrace_dynvar(): if a free dynamic variable appears
1387 1486 * on a hash chain, either the dirty list or the
1388 1487 * rinsing list for some CPU must be non-NULL.)
1389 1488 */
1390 1489 *rinsep = dirty;
1391 1490 dtrace_membar_producer();
1392 1491 } while (dtrace_casptr(&dcpu->dtdsc_dirty,
1393 1492 dirty, NULL) != dirty);
1394 1493 }
1395 1494
1396 1495 if (!work) {
1397 1496 /*
1398 1497 * We have no work to do; we can simply return.
1399 1498 */
1400 1499 return;
1401 1500 }
1402 1501
1403 1502 dtrace_sync();
1404 1503
1405 1504 for (i = 0; i < NCPU; i++) {
1406 1505 dcpu = &dstate->dtds_percpu[i];
1407 1506
1408 1507 if (dcpu->dtdsc_rinsing == NULL)
1409 1508 continue;
1410 1509
1411 1510 /*
1412 1511 * We are now guaranteed that no hash chain contains a pointer
1413 1512 * into this dirty list; we can make it clean.
1414 1513 */
1415 1514 ASSERT(dcpu->dtdsc_clean == NULL);
1416 1515 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1417 1516 dcpu->dtdsc_rinsing = NULL;
1418 1517 }
1419 1518
1420 1519 /*
1421 1520 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1422 1521 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1423 1522 * This prevents a race whereby a CPU incorrectly decides that
1424 1523 * the state should be something other than DTRACE_DSTATE_CLEAN
1425 1524 * after dtrace_dynvar_clean() has completed.
1426 1525 */
1427 1526 dtrace_sync();
1428 1527
1429 1528 dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1430 1529 }
1431 1530
1432 1531 /*
1433 1532 * Depending on the value of the op parameter, this function looks-up,
1434 1533 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1435 1534 * allocation is requested, this function will return a pointer to a
1436 1535 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1437 1536 * variable can be allocated. If NULL is returned, the appropriate counter
1438 1537 * will be incremented.
1439 1538 */
1440 1539 dtrace_dynvar_t *
1441 1540 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
1442 1541 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1443 1542 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1444 1543 {
1445 1544 uint64_t hashval = DTRACE_DYNHASH_VALID;
1446 1545 dtrace_dynhash_t *hash = dstate->dtds_hash;
1447 1546 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1448 1547 processorid_t me = CPU->cpu_id, cpu = me;
1449 1548 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1450 1549 size_t bucket, ksize;
1451 1550 size_t chunksize = dstate->dtds_chunksize;
1452 1551 uintptr_t kdata, lock, nstate;
1453 1552 uint_t i;
1454 1553
1455 1554 ASSERT(nkeys != 0);
1456 1555
1457 1556 /*
1458 1557 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1459 1558 * algorithm. For the by-value portions, we perform the algorithm in
1460 1559 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1461 1560 * bit, and seems to have only a minute effect on distribution. For
1462 1561 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1463 1562 * over each referenced byte. It's painful to do this, but it's much
1464 1563 * better than pathological hash distribution. The efficacy of the
1465 1564 * hashing algorithm (and a comparison with other algorithms) may be
1466 1565 * found by running the ::dtrace_dynstat MDB dcmd.
1467 1566 */
1468 1567 for (i = 0; i < nkeys; i++) {
1469 1568 if (key[i].dttk_size == 0) {
1470 1569 uint64_t val = key[i].dttk_value;
1471 1570
1472 1571 hashval += (val >> 48) & 0xffff;
1473 1572 hashval += (hashval << 10);
1474 1573 hashval ^= (hashval >> 6);
1475 1574
1476 1575 hashval += (val >> 32) & 0xffff;
1477 1576 hashval += (hashval << 10);
1478 1577 hashval ^= (hashval >> 6);
1479 1578
1480 1579 hashval += (val >> 16) & 0xffff;
1481 1580 hashval += (hashval << 10);
1482 1581 hashval ^= (hashval >> 6);
1483 1582
1484 1583 hashval += val & 0xffff;
1485 1584 hashval += (hashval << 10);
1486 1585 hashval ^= (hashval >> 6);
1487 1586 } else {
1488 1587 /*
1489 1588 * This is incredibly painful, but it beats the hell
1490 1589 * out of the alternative.
1491 1590 */
1492 1591 uint64_t j, size = key[i].dttk_size;
1493 1592 uintptr_t base = (uintptr_t)key[i].dttk_value;
1494 1593
1495 1594 if (!dtrace_canload(base, size, mstate, vstate))
1496 1595 break;
1497 1596
1498 1597 for (j = 0; j < size; j++) {
1499 1598 hashval += dtrace_load8(base + j);
1500 1599 hashval += (hashval << 10);
1501 1600 hashval ^= (hashval >> 6);
1502 1601 }
1503 1602 }
1504 1603 }
1505 1604
1506 1605 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1507 1606 return (NULL);
1508 1607
1509 1608 hashval += (hashval << 3);
1510 1609 hashval ^= (hashval >> 11);
1511 1610 hashval += (hashval << 15);
1512 1611
1513 1612 /*
1514 1613 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1515 1614 * comes out to be one of our two sentinel hash values. If this
1516 1615 * actually happens, we set the hashval to be a value known to be a
1517 1616 * non-sentinel value.
1518 1617 */
1519 1618 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1520 1619 hashval = DTRACE_DYNHASH_VALID;
1521 1620
1522 1621 /*
1523 1622 * Yes, it's painful to do a divide here. If the cycle count becomes
1524 1623 * important here, tricks can be pulled to reduce it. (However, it's
1525 1624 * critical that hash collisions be kept to an absolute minimum;
1526 1625 * they're much more painful than a divide.) It's better to have a
1527 1626 * solution that generates few collisions and still keeps things
1528 1627 * relatively simple.
1529 1628 */
1530 1629 bucket = hashval % dstate->dtds_hashsize;
1531 1630
1532 1631 if (op == DTRACE_DYNVAR_DEALLOC) {
1533 1632 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
1534 1633
1535 1634 for (;;) {
1536 1635 while ((lock = *lockp) & 1)
1537 1636 continue;
1538 1637
1539 1638 if (dtrace_casptr((void *)lockp,
1540 1639 (void *)lock, (void *)(lock + 1)) == (void *)lock)
1541 1640 break;
1542 1641 }
1543 1642
1544 1643 dtrace_membar_producer();
1545 1644 }
1546 1645
1547 1646 top:
1548 1647 prev = NULL;
1549 1648 lock = hash[bucket].dtdh_lock;
1550 1649
1551 1650 dtrace_membar_consumer();
1552 1651
1553 1652 start = hash[bucket].dtdh_chain;
1554 1653 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
1555 1654 start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
1556 1655 op != DTRACE_DYNVAR_DEALLOC));
1557 1656
1558 1657 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
1559 1658 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
1560 1659 dtrace_key_t *dkey = &dtuple->dtt_key[0];
1561 1660
1562 1661 if (dvar->dtdv_hashval != hashval) {
1563 1662 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
1564 1663 /*
1565 1664 * We've reached the sink, and therefore the
1566 1665 * end of the hash chain; we can kick out of
1567 1666 * the loop knowing that we have seen a valid
1568 1667 * snapshot of state.
1569 1668 */
1570 1669 ASSERT(dvar->dtdv_next == NULL);
1571 1670 ASSERT(dvar == &dtrace_dynhash_sink);
1572 1671 break;
1573 1672 }
1574 1673
1575 1674 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
1576 1675 /*
1577 1676 * We've gone off the rails: somewhere along
1578 1677 * the line, one of the members of this hash
1579 1678 * chain was deleted. Note that we could also
1580 1679 * detect this by simply letting this loop run
1581 1680 * to completion, as we would eventually hit
1582 1681 * the end of the dirty list. However, we
1583 1682 * want to avoid running the length of the
1584 1683 * dirty list unnecessarily (it might be quite
1585 1684 * long), so we catch this as early as
1586 1685 * possible by detecting the hash marker. In
1587 1686 * this case, we simply set dvar to NULL and
1588 1687 * break; the conditional after the loop will
1589 1688 * send us back to top.
1590 1689 */
1591 1690 dvar = NULL;
1592 1691 break;
1593 1692 }
1594 1693
1595 1694 goto next;
1596 1695 }
1597 1696
1598 1697 if (dtuple->dtt_nkeys != nkeys)
1599 1698 goto next;
1600 1699
1601 1700 for (i = 0; i < nkeys; i++, dkey++) {
1602 1701 if (dkey->dttk_size != key[i].dttk_size)
1603 1702 goto next; /* size or type mismatch */
1604 1703
1605 1704 if (dkey->dttk_size != 0) {
1606 1705 if (dtrace_bcmp(
1607 1706 (void *)(uintptr_t)key[i].dttk_value,
1608 1707 (void *)(uintptr_t)dkey->dttk_value,
1609 1708 dkey->dttk_size))
1610 1709 goto next;
1611 1710 } else {
1612 1711 if (dkey->dttk_value != key[i].dttk_value)
1613 1712 goto next;
1614 1713 }
1615 1714 }
1616 1715
1617 1716 if (op != DTRACE_DYNVAR_DEALLOC)
1618 1717 return (dvar);
1619 1718
1620 1719 ASSERT(dvar->dtdv_next == NULL ||
1621 1720 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
1622 1721
1623 1722 if (prev != NULL) {
1624 1723 ASSERT(hash[bucket].dtdh_chain != dvar);
1625 1724 ASSERT(start != dvar);
1626 1725 ASSERT(prev->dtdv_next == dvar);
1627 1726 prev->dtdv_next = dvar->dtdv_next;
1628 1727 } else {
1629 1728 if (dtrace_casptr(&hash[bucket].dtdh_chain,
1630 1729 start, dvar->dtdv_next) != start) {
1631 1730 /*
1632 1731 * We have failed to atomically swing the
1633 1732 * hash table head pointer, presumably because
1634 1733 * of a conflicting allocation on another CPU.
1635 1734 * We need to reread the hash chain and try
1636 1735 * again.
1637 1736 */
1638 1737 goto top;
1639 1738 }
1640 1739 }
1641 1740
1642 1741 dtrace_membar_producer();
1643 1742
1644 1743 /*
1645 1744 * Now set the hash value to indicate that it's free.
1646 1745 */
1647 1746 ASSERT(hash[bucket].dtdh_chain != dvar);
1648 1747 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1649 1748
1650 1749 dtrace_membar_producer();
1651 1750
1652 1751 /*
1653 1752 * Set the next pointer to point at the dirty list, and
1654 1753 * atomically swing the dirty pointer to the newly freed dvar.
1655 1754 */
1656 1755 do {
1657 1756 next = dcpu->dtdsc_dirty;
1658 1757 dvar->dtdv_next = next;
1659 1758 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
1660 1759
1661 1760 /*
1662 1761 * Finally, unlock this hash bucket.
1663 1762 */
1664 1763 ASSERT(hash[bucket].dtdh_lock == lock);
1665 1764 ASSERT(lock & 1);
1666 1765 hash[bucket].dtdh_lock++;
1667 1766
1668 1767 return (NULL);
1669 1768 next:
1670 1769 prev = dvar;
1671 1770 continue;
1672 1771 }
1673 1772
1674 1773 if (dvar == NULL) {
1675 1774 /*
1676 1775 * If dvar is NULL, it is because we went off the rails:
1677 1776 * one of the elements that we traversed in the hash chain
1678 1777 * was deleted while we were traversing it. In this case,
1679 1778 * we assert that we aren't doing a dealloc (deallocs lock
1680 1779 * the hash bucket to prevent themselves from racing with
1681 1780 * one another), and retry the hash chain traversal.
1682 1781 */
1683 1782 ASSERT(op != DTRACE_DYNVAR_DEALLOC);
1684 1783 goto top;
1685 1784 }
1686 1785
1687 1786 if (op != DTRACE_DYNVAR_ALLOC) {
1688 1787 /*
1689 1788 * If we are not to allocate a new variable, we want to
1690 1789 * return NULL now. Before we return, check that the value
1691 1790 * of the lock word hasn't changed. If it has, we may have
1692 1791 * seen an inconsistent snapshot.
1693 1792 */
1694 1793 if (op == DTRACE_DYNVAR_NOALLOC) {
1695 1794 if (hash[bucket].dtdh_lock != lock)
1696 1795 goto top;
1697 1796 } else {
1698 1797 ASSERT(op == DTRACE_DYNVAR_DEALLOC);
1699 1798 ASSERT(hash[bucket].dtdh_lock == lock);
1700 1799 ASSERT(lock & 1);
1701 1800 hash[bucket].dtdh_lock++;
1702 1801 }
1703 1802
1704 1803 return (NULL);
1705 1804 }
1706 1805
1707 1806 /*
1708 1807 * We need to allocate a new dynamic variable. The size we need is the
1709 1808 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1710 1809 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1711 1810 * the size of any referred-to data (dsize). We then round the final
1712 1811 * size up to the chunksize for allocation.
1713 1812 */
1714 1813 for (ksize = 0, i = 0; i < nkeys; i++)
1715 1814 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
1716 1815
1717 1816 /*
1718 1817 * This should be pretty much impossible, but could happen if, say,
1719 1818 * strange DIF specified the tuple. Ideally, this should be an
1720 1819 * assertion and not an error condition -- but that requires that the
1721 1820 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1722 1821 * bullet-proof. (That is, it must not be able to be fooled by
1723 1822 * malicious DIF.) Given the lack of backwards branches in DIF,
1724 1823 * solving this would presumably not amount to solving the Halting
1725 1824 * Problem -- but it still seems awfully hard.
1726 1825 */
1727 1826 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
1728 1827 ksize + dsize > chunksize) {
1729 1828 dcpu->dtdsc_drops++;
1730 1829 return (NULL);
1731 1830 }
1732 1831
1733 1832 nstate = DTRACE_DSTATE_EMPTY;
1734 1833
1735 1834 do {
1736 1835 retry:
1737 1836 free = dcpu->dtdsc_free;
1738 1837
1739 1838 if (free == NULL) {
1740 1839 dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
1741 1840 void *rval;
1742 1841
1743 1842 if (clean == NULL) {
1744 1843 /*
1745 1844 * We're out of dynamic variable space on
1746 1845 * this CPU. Unless we have tried all CPUs,
1747 1846 * we'll try to allocate from a different
1748 1847 * CPU.
1749 1848 */
1750 1849 switch (dstate->dtds_state) {
1751 1850 case DTRACE_DSTATE_CLEAN: {
1752 1851 void *sp = &dstate->dtds_state;
1753 1852
1754 1853 if (++cpu >= NCPU)
1755 1854 cpu = 0;
1756 1855
1757 1856 if (dcpu->dtdsc_dirty != NULL &&
1758 1857 nstate == DTRACE_DSTATE_EMPTY)
1759 1858 nstate = DTRACE_DSTATE_DIRTY;
1760 1859
1761 1860 if (dcpu->dtdsc_rinsing != NULL)
1762 1861 nstate = DTRACE_DSTATE_RINSING;
1763 1862
1764 1863 dcpu = &dstate->dtds_percpu[cpu];
1765 1864
1766 1865 if (cpu != me)
1767 1866 goto retry;
1768 1867
1769 1868 (void) dtrace_cas32(sp,
1770 1869 DTRACE_DSTATE_CLEAN, nstate);
1771 1870
1772 1871 /*
1773 1872 * To increment the correct bean
1774 1873 * counter, take another lap.
1775 1874 */
1776 1875 goto retry;
1777 1876 }
1778 1877
1779 1878 case DTRACE_DSTATE_DIRTY:
1780 1879 dcpu->dtdsc_dirty_drops++;
1781 1880 break;
1782 1881
1783 1882 case DTRACE_DSTATE_RINSING:
1784 1883 dcpu->dtdsc_rinsing_drops++;
1785 1884 break;
1786 1885
1787 1886 case DTRACE_DSTATE_EMPTY:
1788 1887 dcpu->dtdsc_drops++;
1789 1888 break;
1790 1889 }
1791 1890
1792 1891 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
1793 1892 return (NULL);
1794 1893 }
1795 1894
1796 1895 /*
1797 1896 * The clean list appears to be non-empty. We want to
1798 1897 * move the clean list to the free list; we start by
1799 1898 * moving the clean pointer aside.
1800 1899 */
1801 1900 if (dtrace_casptr(&dcpu->dtdsc_clean,
1802 1901 clean, NULL) != clean) {
1803 1902 /*
1804 1903 * We are in one of two situations:
1805 1904 *
1806 1905 * (a) The clean list was switched to the
1807 1906 * free list by another CPU.
1808 1907 *
1809 1908 * (b) The clean list was added to by the
1810 1909 * cleansing cyclic.
1811 1910 *
1812 1911 * In either of these situations, we can
1813 1912 * just reattempt the free list allocation.
1814 1913 */
1815 1914 goto retry;
1816 1915 }
1817 1916
1818 1917 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
1819 1918
1820 1919 /*
1821 1920 * Now we'll move the clean list to our free list.
1822 1921 * It's impossible for this to fail: the only way
1823 1922 * the free list can be updated is through this
1824 1923 * code path, and only one CPU can own the clean list.
1825 1924 * Thus, it would only be possible for this to fail if
1826 1925 * this code were racing with dtrace_dynvar_clean().
1827 1926 * (That is, if dtrace_dynvar_clean() updated the clean
1828 1927 * list, and we ended up racing to update the free
1829 1928 * list.) This race is prevented by the dtrace_sync()
1830 1929 * in dtrace_dynvar_clean() -- which flushes the
1831 1930 * owners of the clean lists out before resetting
1832 1931 * the clean lists.
1833 1932 */
1834 1933 dcpu = &dstate->dtds_percpu[me];
1835 1934 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
1836 1935 ASSERT(rval == NULL);
1837 1936 goto retry;
1838 1937 }
1839 1938
1840 1939 dvar = free;
1841 1940 new_free = dvar->dtdv_next;
1842 1941 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
1843 1942
1844 1943 /*
1845 1944 * We have now allocated a new chunk. We copy the tuple keys into the
1846 1945 * tuple array and copy any referenced key data into the data space
1847 1946 * following the tuple array. As we do this, we relocate dttk_value
1848 1947 * in the final tuple to point to the key data address in the chunk.
1849 1948 */
1850 1949 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
1851 1950 dvar->dtdv_data = (void *)(kdata + ksize);
1852 1951 dvar->dtdv_tuple.dtt_nkeys = nkeys;
1853 1952
1854 1953 for (i = 0; i < nkeys; i++) {
1855 1954 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
1856 1955 size_t kesize = key[i].dttk_size;
1857 1956
1858 1957 if (kesize != 0) {
1859 1958 dtrace_bcopy(
1860 1959 (const void *)(uintptr_t)key[i].dttk_value,
1861 1960 (void *)kdata, kesize);
1862 1961 dkey->dttk_value = kdata;
1863 1962 kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
1864 1963 } else {
1865 1964 dkey->dttk_value = key[i].dttk_value;
1866 1965 }
1867 1966
1868 1967 dkey->dttk_size = kesize;
1869 1968 }
1870 1969
1871 1970 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
1872 1971 dvar->dtdv_hashval = hashval;
1873 1972 dvar->dtdv_next = start;
1874 1973
1875 1974 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
1876 1975 return (dvar);
1877 1976
1878 1977 /*
1879 1978 * The cas has failed. Either another CPU is adding an element to
1880 1979 * this hash chain, or another CPU is deleting an element from this
1881 1980 * hash chain. The simplest way to deal with both of these cases
1882 1981 * (though not necessarily the most efficient) is to free our
1883 1982 * allocated block and tail-call ourselves. Note that the free is
1884 1983 * to the dirty list and _not_ to the free list. This is to prevent
1885 1984 * races with allocators, above.
1886 1985 */
1887 1986 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1888 1987
1889 1988 dtrace_membar_producer();
1890 1989
1891 1990 do {
1892 1991 free = dcpu->dtdsc_dirty;
1893 1992 dvar->dtdv_next = free;
1894 1993 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
1895 1994
1896 1995 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
1897 1996 }
1898 1997
1899 1998 /*ARGSUSED*/
1900 1999 static void
1901 2000 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
1902 2001 {
1903 2002 if ((int64_t)nval < (int64_t)*oval)
1904 2003 *oval = nval;
1905 2004 }
1906 2005
1907 2006 /*ARGSUSED*/
1908 2007 static void
1909 2008 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
1910 2009 {
1911 2010 if ((int64_t)nval > (int64_t)*oval)
1912 2011 *oval = nval;
1913 2012 }
1914 2013
1915 2014 static void
1916 2015 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
1917 2016 {
1918 2017 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
1919 2018 int64_t val = (int64_t)nval;
1920 2019
1921 2020 if (val < 0) {
1922 2021 for (i = 0; i < zero; i++) {
1923 2022 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
1924 2023 quanta[i] += incr;
1925 2024 return;
1926 2025 }
1927 2026 }
1928 2027 } else {
1929 2028 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
1930 2029 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
1931 2030 quanta[i - 1] += incr;
1932 2031 return;
1933 2032 }
1934 2033 }
1935 2034
1936 2035 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
1937 2036 return;
1938 2037 }
1939 2038
1940 2039 ASSERT(0);
1941 2040 }
1942 2041
1943 2042 static void
1944 2043 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
1945 2044 {
1946 2045 uint64_t arg = *lquanta++;
1947 2046 int32_t base = DTRACE_LQUANTIZE_BASE(arg);
1948 2047 uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
1949 2048 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
1950 2049 int32_t val = (int32_t)nval, level;
1951 2050
1952 2051 ASSERT(step != 0);
1953 2052 ASSERT(levels != 0);
1954 2053
1955 2054 if (val < base) {
1956 2055 /*
1957 2056 * This is an underflow.
1958 2057 */
1959 2058 lquanta[0] += incr;
1960 2059 return;
1961 2060 }
1962 2061
1963 2062 level = (val - base) / step;
1964 2063
1965 2064 if (level < levels) {
1966 2065 lquanta[level + 1] += incr;
1967 2066 return;
1968 2067 }
1969 2068
1970 2069 /*
1971 2070 * This is an overflow.
1972 2071 */
1973 2072 lquanta[levels + 1] += incr;
1974 2073 }
1975 2074
1976 2075 static int
1977 2076 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
1978 2077 uint16_t high, uint16_t nsteps, int64_t value)
1979 2078 {
1980 2079 int64_t this = 1, last, next;
1981 2080 int base = 1, order;
1982 2081
1983 2082 ASSERT(factor <= nsteps);
1984 2083 ASSERT(nsteps % factor == 0);
1985 2084
1986 2085 for (order = 0; order < low; order++)
1987 2086 this *= factor;
1988 2087
1989 2088 /*
1990 2089 * If our value is less than our factor taken to the power of the
1991 2090 * low order of magnitude, it goes into the zeroth bucket.
1992 2091 */
1993 2092 if (value < (last = this))
1994 2093 return (0);
1995 2094
1996 2095 for (this *= factor; order <= high; order++) {
1997 2096 int nbuckets = this > nsteps ? nsteps : this;
1998 2097
1999 2098 if ((next = this * factor) < this) {
2000 2099 /*
2001 2100 * We should not generally get log/linear quantizations
2002 2101 * with a high magnitude that allows 64-bits to
2003 2102 * overflow, but we nonetheless protect against this
2004 2103 * by explicitly checking for overflow, and clamping
2005 2104 * our value accordingly.
2006 2105 */
2007 2106 value = this - 1;
2008 2107 }
2009 2108
2010 2109 if (value < this) {
2011 2110 /*
2012 2111 * If our value lies within this order of magnitude,
2013 2112 * determine its position by taking the offset within
2014 2113 * the order of magnitude, dividing by the bucket
2015 2114 * width, and adding to our (accumulated) base.
2016 2115 */
2017 2116 return (base + (value - last) / (this / nbuckets));
2018 2117 }
2019 2118
2020 2119 base += nbuckets - (nbuckets / factor);
2021 2120 last = this;
2022 2121 this = next;
2023 2122 }
2024 2123
2025 2124 /*
2026 2125 * Our value is greater than or equal to our factor taken to the
2027 2126 * power of one plus the high magnitude -- return the top bucket.
2028 2127 */
2029 2128 return (base);
2030 2129 }
2031 2130
2032 2131 static void
2033 2132 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2034 2133 {
2035 2134 uint64_t arg = *llquanta++;
2036 2135 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2037 2136 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2038 2137 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
2039 2138 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
2040 2139
2041 2140 llquanta[dtrace_aggregate_llquantize_bucket(factor,
2042 2141 low, high, nsteps, nval)] += incr;
2043 2142 }
2044 2143
2045 2144 /*ARGSUSED*/
2046 2145 static void
2047 2146 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2048 2147 {
2049 2148 data[0]++;
2050 2149 data[1] += nval;
2051 2150 }
2052 2151
2053 2152 /*ARGSUSED*/
2054 2153 static void
2055 2154 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2056 2155 {
2057 2156 int64_t snval = (int64_t)nval;
2058 2157 uint64_t tmp[2];
2059 2158
2060 2159 data[0]++;
2061 2160 data[1] += nval;
2062 2161
2063 2162 /*
2064 2163 * What we want to say here is:
2065 2164 *
2066 2165 * data[2] += nval * nval;
2067 2166 *
2068 2167 * But given that nval is 64-bit, we could easily overflow, so
2069 2168 * we do this as 128-bit arithmetic.
2070 2169 */
2071 2170 if (snval < 0)
2072 2171 snval = -snval;
2073 2172
2074 2173 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2075 2174 dtrace_add_128(data + 2, tmp, data + 2);
2076 2175 }
2077 2176
2078 2177 /*ARGSUSED*/
2079 2178 static void
2080 2179 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2081 2180 {
2082 2181 *oval = *oval + 1;
2083 2182 }
2084 2183
2085 2184 /*ARGSUSED*/
2086 2185 static void
2087 2186 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2088 2187 {
2089 2188 *oval += nval;
2090 2189 }
2091 2190
2092 2191 /*
2093 2192 * Aggregate given the tuple in the principal data buffer, and the aggregating
2094 2193 * action denoted by the specified dtrace_aggregation_t. The aggregation
2095 2194 * buffer is specified as the buf parameter. This routine does not return
2096 2195 * failure; if there is no space in the aggregation buffer, the data will be
2097 2196 * dropped, and a corresponding counter incremented.
2098 2197 */
2099 2198 static void
2100 2199 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2101 2200 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2102 2201 {
2103 2202 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2104 2203 uint32_t i, ndx, size, fsize;
2105 2204 uint32_t align = sizeof (uint64_t) - 1;
2106 2205 dtrace_aggbuffer_t *agb;
2107 2206 dtrace_aggkey_t *key;
2108 2207 uint32_t hashval = 0, limit, isstr;
2109 2208 caddr_t tomax, data, kdata;
2110 2209 dtrace_actkind_t action;
2111 2210 dtrace_action_t *act;
2112 2211 uintptr_t offs;
2113 2212
2114 2213 if (buf == NULL)
2115 2214 return;
2116 2215
2117 2216 if (!agg->dtag_hasarg) {
2118 2217 /*
2119 2218 * Currently, only quantize() and lquantize() take additional
2120 2219 * arguments, and they have the same semantics: an increment
2121 2220 * value that defaults to 1 when not present. If additional
2122 2221 * aggregating actions take arguments, the setting of the
2123 2222 * default argument value will presumably have to become more
2124 2223 * sophisticated...
2125 2224 */
2126 2225 arg = 1;
2127 2226 }
2128 2227
2129 2228 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2130 2229 size = rec->dtrd_offset - agg->dtag_base;
2131 2230 fsize = size + rec->dtrd_size;
2132 2231
2133 2232 ASSERT(dbuf->dtb_tomax != NULL);
2134 2233 data = dbuf->dtb_tomax + offset + agg->dtag_base;
2135 2234
2136 2235 if ((tomax = buf->dtb_tomax) == NULL) {
2137 2236 dtrace_buffer_drop(buf);
2138 2237 return;
2139 2238 }
2140 2239
2141 2240 /*
2142 2241 * The metastructure is always at the bottom of the buffer.
2143 2242 */
2144 2243 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2145 2244 sizeof (dtrace_aggbuffer_t));
2146 2245
2147 2246 if (buf->dtb_offset == 0) {
2148 2247 /*
2149 2248 * We just kludge up approximately 1/8th of the size to be
2150 2249 * buckets. If this guess ends up being routinely
2151 2250 * off-the-mark, we may need to dynamically readjust this
2152 2251 * based on past performance.
2153 2252 */
2154 2253 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2155 2254
2156 2255 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2157 2256 (uintptr_t)tomax || hashsize == 0) {
2158 2257 /*
2159 2258 * We've been given a ludicrously small buffer;
2160 2259 * increment our drop count and leave.
2161 2260 */
2162 2261 dtrace_buffer_drop(buf);
2163 2262 return;
2164 2263 }
2165 2264
2166 2265 /*
2167 2266 * And now, a pathetic attempt to try to get a an odd (or
2168 2267 * perchance, a prime) hash size for better hash distribution.
2169 2268 */
2170 2269 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2171 2270 hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2172 2271
2173 2272 agb->dtagb_hashsize = hashsize;
2174 2273 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2175 2274 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2176 2275 agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2177 2276
2178 2277 for (i = 0; i < agb->dtagb_hashsize; i++)
2179 2278 agb->dtagb_hash[i] = NULL;
2180 2279 }
2181 2280
2182 2281 ASSERT(agg->dtag_first != NULL);
2183 2282 ASSERT(agg->dtag_first->dta_intuple);
2184 2283
2185 2284 /*
2186 2285 * Calculate the hash value based on the key. Note that we _don't_
2187 2286 * include the aggid in the hashing (but we will store it as part of
2188 2287 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2189 2288 * algorithm: a simple, quick algorithm that has no known funnels, and
2190 2289 * gets good distribution in practice. The efficacy of the hashing
2191 2290 * algorithm (and a comparison with other algorithms) may be found by
2192 2291 * running the ::dtrace_aggstat MDB dcmd.
2193 2292 */
2194 2293 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2195 2294 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2196 2295 limit = i + act->dta_rec.dtrd_size;
2197 2296 ASSERT(limit <= size);
2198 2297 isstr = DTRACEACT_ISSTRING(act);
2199 2298
2200 2299 for (; i < limit; i++) {
2201 2300 hashval += data[i];
2202 2301 hashval += (hashval << 10);
2203 2302 hashval ^= (hashval >> 6);
2204 2303
2205 2304 if (isstr && data[i] == '\0')
2206 2305 break;
2207 2306 }
2208 2307 }
2209 2308
2210 2309 hashval += (hashval << 3);
2211 2310 hashval ^= (hashval >> 11);
2212 2311 hashval += (hashval << 15);
2213 2312
2214 2313 /*
2215 2314 * Yes, the divide here is expensive -- but it's generally the least
2216 2315 * of the performance issues given the amount of data that we iterate
2217 2316 * over to compute hash values, compare data, etc.
2218 2317 */
2219 2318 ndx = hashval % agb->dtagb_hashsize;
2220 2319
2221 2320 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2222 2321 ASSERT((caddr_t)key >= tomax);
2223 2322 ASSERT((caddr_t)key < tomax + buf->dtb_size);
2224 2323
2225 2324 if (hashval != key->dtak_hashval || key->dtak_size != size)
2226 2325 continue;
2227 2326
2228 2327 kdata = key->dtak_data;
2229 2328 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2230 2329
2231 2330 for (act = agg->dtag_first; act->dta_intuple;
2232 2331 act = act->dta_next) {
2233 2332 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2234 2333 limit = i + act->dta_rec.dtrd_size;
2235 2334 ASSERT(limit <= size);
2236 2335 isstr = DTRACEACT_ISSTRING(act);
2237 2336
2238 2337 for (; i < limit; i++) {
2239 2338 if (kdata[i] != data[i])
2240 2339 goto next;
2241 2340
2242 2341 if (isstr && data[i] == '\0')
2243 2342 break;
2244 2343 }
2245 2344 }
2246 2345
2247 2346 if (action != key->dtak_action) {
2248 2347 /*
2249 2348 * We are aggregating on the same value in the same
2250 2349 * aggregation with two different aggregating actions.
2251 2350 * (This should have been picked up in the compiler,
2252 2351 * so we may be dealing with errant or devious DIF.)
2253 2352 * This is an error condition; we indicate as much,
2254 2353 * and return.
2255 2354 */
2256 2355 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2257 2356 return;
2258 2357 }
2259 2358
2260 2359 /*
2261 2360 * This is a hit: we need to apply the aggregator to
2262 2361 * the value at this key.
2263 2362 */
2264 2363 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2265 2364 return;
2266 2365 next:
2267 2366 continue;
2268 2367 }
2269 2368
2270 2369 /*
2271 2370 * We didn't find it. We need to allocate some zero-filled space,
2272 2371 * link it into the hash table appropriately, and apply the aggregator
2273 2372 * to the (zero-filled) value.
2274 2373 */
2275 2374 offs = buf->dtb_offset;
2276 2375 while (offs & (align - 1))
2277 2376 offs += sizeof (uint32_t);
2278 2377
2279 2378 /*
2280 2379 * If we don't have enough room to both allocate a new key _and_
2281 2380 * its associated data, increment the drop count and return.
2282 2381 */
2283 2382 if ((uintptr_t)tomax + offs + fsize >
2284 2383 agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2285 2384 dtrace_buffer_drop(buf);
2286 2385 return;
2287 2386 }
2288 2387
2289 2388 /*CONSTCOND*/
2290 2389 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2291 2390 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2292 2391 agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2293 2392
2294 2393 key->dtak_data = kdata = tomax + offs;
2295 2394 buf->dtb_offset = offs + fsize;
2296 2395
2297 2396 /*
2298 2397 * Now copy the data across.
2299 2398 */
2300 2399 *((dtrace_aggid_t *)kdata) = agg->dtag_id;
2301 2400
2302 2401 for (i = sizeof (dtrace_aggid_t); i < size; i++)
2303 2402 kdata[i] = data[i];
2304 2403
2305 2404 /*
2306 2405 * Because strings are not zeroed out by default, we need to iterate
2307 2406 * looking for actions that store strings, and we need to explicitly
2308 2407 * pad these strings out with zeroes.
2309 2408 */
2310 2409 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2311 2410 int nul;
2312 2411
2313 2412 if (!DTRACEACT_ISSTRING(act))
2314 2413 continue;
2315 2414
2316 2415 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2317 2416 limit = i + act->dta_rec.dtrd_size;
2318 2417 ASSERT(limit <= size);
2319 2418
2320 2419 for (nul = 0; i < limit; i++) {
2321 2420 if (nul) {
2322 2421 kdata[i] = '\0';
2323 2422 continue;
2324 2423 }
2325 2424
2326 2425 if (data[i] != '\0')
2327 2426 continue;
2328 2427
2329 2428 nul = 1;
2330 2429 }
2331 2430 }
2332 2431
2333 2432 for (i = size; i < fsize; i++)
2334 2433 kdata[i] = 0;
2335 2434
2336 2435 key->dtak_hashval = hashval;
2337 2436 key->dtak_size = size;
2338 2437 key->dtak_action = action;
2339 2438 key->dtak_next = agb->dtagb_hash[ndx];
2340 2439 agb->dtagb_hash[ndx] = key;
2341 2440
2342 2441 /*
2343 2442 * Finally, apply the aggregator.
2344 2443 */
2345 2444 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2346 2445 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2347 2446 }
2348 2447
2349 2448 /*
2350 2449 * Given consumer state, this routine finds a speculation in the INACTIVE
2351 2450 * state and transitions it into the ACTIVE state. If there is no speculation
2352 2451 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2353 2452 * incremented -- it is up to the caller to take appropriate action.
2354 2453 */
2355 2454 static int
2356 2455 dtrace_speculation(dtrace_state_t *state)
2357 2456 {
2358 2457 int i = 0;
2359 2458 dtrace_speculation_state_t current;
2360 2459 uint32_t *stat = &state->dts_speculations_unavail, count;
2361 2460
2362 2461 while (i < state->dts_nspeculations) {
2363 2462 dtrace_speculation_t *spec = &state->dts_speculations[i];
2364 2463
2365 2464 current = spec->dtsp_state;
2366 2465
2367 2466 if (current != DTRACESPEC_INACTIVE) {
2368 2467 if (current == DTRACESPEC_COMMITTINGMANY ||
2369 2468 current == DTRACESPEC_COMMITTING ||
2370 2469 current == DTRACESPEC_DISCARDING)
2371 2470 stat = &state->dts_speculations_busy;
2372 2471 i++;
2373 2472 continue;
2374 2473 }
2375 2474
2376 2475 if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2377 2476 current, DTRACESPEC_ACTIVE) == current)
2378 2477 return (i + 1);
2379 2478 }
2380 2479
2381 2480 /*
2382 2481 * We couldn't find a speculation. If we found as much as a single
2383 2482 * busy speculation buffer, we'll attribute this failure as "busy"
2384 2483 * instead of "unavail".
2385 2484 */
2386 2485 do {
2387 2486 count = *stat;
2388 2487 } while (dtrace_cas32(stat, count, count + 1) != count);
2389 2488
2390 2489 return (0);
2391 2490 }
2392 2491
2393 2492 /*
2394 2493 * This routine commits an active speculation. If the specified speculation
2395 2494 * is not in a valid state to perform a commit(), this routine will silently do
2396 2495 * nothing. The state of the specified speculation is transitioned according
2397 2496 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2398 2497 */
2399 2498 static void
2400 2499 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2401 2500 dtrace_specid_t which)
2402 2501 {
2403 2502 dtrace_speculation_t *spec;
2404 2503 dtrace_buffer_t *src, *dest;
2405 2504 uintptr_t daddr, saddr, dlimit, slimit;
2406 2505 dtrace_speculation_state_t current, new;
2407 2506 intptr_t offs;
2408 2507 uint64_t timestamp;
2409 2508
2410 2509 if (which == 0)
2411 2510 return;
2412 2511
2413 2512 if (which > state->dts_nspeculations) {
2414 2513 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2415 2514 return;
2416 2515 }
2417 2516
2418 2517 spec = &state->dts_speculations[which - 1];
2419 2518 src = &spec->dtsp_buffer[cpu];
2420 2519 dest = &state->dts_buffer[cpu];
2421 2520
2422 2521 do {
2423 2522 current = spec->dtsp_state;
2424 2523
2425 2524 if (current == DTRACESPEC_COMMITTINGMANY)
2426 2525 break;
2427 2526
2428 2527 switch (current) {
2429 2528 case DTRACESPEC_INACTIVE:
2430 2529 case DTRACESPEC_DISCARDING:
2431 2530 return;
2432 2531
2433 2532 case DTRACESPEC_COMMITTING:
2434 2533 /*
2435 2534 * This is only possible if we are (a) commit()'ing
2436 2535 * without having done a prior speculate() on this CPU
2437 2536 * and (b) racing with another commit() on a different
2438 2537 * CPU. There's nothing to do -- we just assert that
2439 2538 * our offset is 0.
2440 2539 */
2441 2540 ASSERT(src->dtb_offset == 0);
2442 2541 return;
2443 2542
2444 2543 case DTRACESPEC_ACTIVE:
2445 2544 new = DTRACESPEC_COMMITTING;
2446 2545 break;
2447 2546
2448 2547 case DTRACESPEC_ACTIVEONE:
2449 2548 /*
2450 2549 * This speculation is active on one CPU. If our
2451 2550 * buffer offset is non-zero, we know that the one CPU
2452 2551 * must be us. Otherwise, we are committing on a
2453 2552 * different CPU from the speculate(), and we must
2454 2553 * rely on being asynchronously cleaned.
2455 2554 */
2456 2555 if (src->dtb_offset != 0) {
2457 2556 new = DTRACESPEC_COMMITTING;
2458 2557 break;
2459 2558 }
2460 2559 /*FALLTHROUGH*/
2461 2560
2462 2561 case DTRACESPEC_ACTIVEMANY:
2463 2562 new = DTRACESPEC_COMMITTINGMANY;
2464 2563 break;
2465 2564
2466 2565 default:
2467 2566 ASSERT(0);
2468 2567 }
2469 2568 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2470 2569 current, new) != current);
2471 2570
2472 2571 /*
2473 2572 * We have set the state to indicate that we are committing this
2474 2573 * speculation. Now reserve the necessary space in the destination
2475 2574 * buffer.
2476 2575 */
2477 2576 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2478 2577 sizeof (uint64_t), state, NULL)) < 0) {
2479 2578 dtrace_buffer_drop(dest);
2480 2579 goto out;
2481 2580 }
2482 2581
2483 2582 /*
2484 2583 * We have sufficient space to copy the speculative buffer into the
2485 2584 * primary buffer. First, modify the speculative buffer, filling
2486 2585 * in the timestamp of all entries with the current time. The data
2487 2586 * must have the commit() time rather than the time it was traced,
2488 2587 * so that all entries in the primary buffer are in timestamp order.
2489 2588 */
2490 2589 timestamp = dtrace_gethrtime();
2491 2590 saddr = (uintptr_t)src->dtb_tomax;
2492 2591 slimit = saddr + src->dtb_offset;
2493 2592 while (saddr < slimit) {
2494 2593 size_t size;
2495 2594 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
2496 2595
2497 2596 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
2498 2597 saddr += sizeof (dtrace_epid_t);
2499 2598 continue;
2500 2599 }
2501 2600 ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
2502 2601 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
2503 2602
2504 2603 ASSERT3U(saddr + size, <=, slimit);
2505 2604 ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
2506 2605 ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
2507 2606
2508 2607 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
2509 2608
2510 2609 saddr += size;
2511 2610 }
2512 2611
2513 2612 /*
2514 2613 * Copy the buffer across. (Note that this is a
2515 2614 * highly subobtimal bcopy(); in the unlikely event that this becomes
2516 2615 * a serious performance issue, a high-performance DTrace-specific
2517 2616 * bcopy() should obviously be invented.)
2518 2617 */
2519 2618 daddr = (uintptr_t)dest->dtb_tomax + offs;
2520 2619 dlimit = daddr + src->dtb_offset;
2521 2620 saddr = (uintptr_t)src->dtb_tomax;
2522 2621
2523 2622 /*
2524 2623 * First, the aligned portion.
2525 2624 */
2526 2625 while (dlimit - daddr >= sizeof (uint64_t)) {
2527 2626 *((uint64_t *)daddr) = *((uint64_t *)saddr);
2528 2627
2529 2628 daddr += sizeof (uint64_t);
2530 2629 saddr += sizeof (uint64_t);
2531 2630 }
2532 2631
2533 2632 /*
2534 2633 * Now any left-over bit...
2535 2634 */
2536 2635 while (dlimit - daddr)
2537 2636 *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
2538 2637
2539 2638 /*
2540 2639 * Finally, commit the reserved space in the destination buffer.
2541 2640 */
2542 2641 dest->dtb_offset = offs + src->dtb_offset;
2543 2642
2544 2643 out:
2545 2644 /*
2546 2645 * If we're lucky enough to be the only active CPU on this speculation
2547 2646 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2548 2647 */
2549 2648 if (current == DTRACESPEC_ACTIVE ||
2550 2649 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
2551 2650 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
2552 2651 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
2553 2652
2554 2653 ASSERT(rval == DTRACESPEC_COMMITTING);
2555 2654 }
2556 2655
2557 2656 src->dtb_offset = 0;
2558 2657 src->dtb_xamot_drops += src->dtb_drops;
2559 2658 src->dtb_drops = 0;
2560 2659 }
2561 2660
2562 2661 /*
2563 2662 * This routine discards an active speculation. If the specified speculation
2564 2663 * is not in a valid state to perform a discard(), this routine will silently
2565 2664 * do nothing. The state of the specified speculation is transitioned
2566 2665 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2567 2666 */
2568 2667 static void
2569 2668 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
2570 2669 dtrace_specid_t which)
2571 2670 {
2572 2671 dtrace_speculation_t *spec;
2573 2672 dtrace_speculation_state_t current, new;
2574 2673 dtrace_buffer_t *buf;
2575 2674
2576 2675 if (which == 0)
2577 2676 return;
2578 2677
2579 2678 if (which > state->dts_nspeculations) {
2580 2679 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2581 2680 return;
2582 2681 }
2583 2682
2584 2683 spec = &state->dts_speculations[which - 1];
2585 2684 buf = &spec->dtsp_buffer[cpu];
2586 2685
2587 2686 do {
2588 2687 current = spec->dtsp_state;
2589 2688
2590 2689 switch (current) {
2591 2690 case DTRACESPEC_INACTIVE:
2592 2691 case DTRACESPEC_COMMITTINGMANY:
2593 2692 case DTRACESPEC_COMMITTING:
2594 2693 case DTRACESPEC_DISCARDING:
2595 2694 return;
2596 2695
2597 2696 case DTRACESPEC_ACTIVE:
2598 2697 case DTRACESPEC_ACTIVEMANY:
2599 2698 new = DTRACESPEC_DISCARDING;
2600 2699 break;
2601 2700
2602 2701 case DTRACESPEC_ACTIVEONE:
2603 2702 if (buf->dtb_offset != 0) {
2604 2703 new = DTRACESPEC_INACTIVE;
2605 2704 } else {
2606 2705 new = DTRACESPEC_DISCARDING;
2607 2706 }
2608 2707 break;
2609 2708
2610 2709 default:
2611 2710 ASSERT(0);
2612 2711 }
2613 2712 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2614 2713 current, new) != current);
2615 2714
2616 2715 buf->dtb_offset = 0;
2617 2716 buf->dtb_drops = 0;
2618 2717 }
2619 2718
2620 2719 /*
2621 2720 * Note: not called from probe context. This function is called
2622 2721 * asynchronously from cross call context to clean any speculations that are
2623 2722 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2624 2723 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2625 2724 * speculation.
2626 2725 */
2627 2726 static void
2628 2727 dtrace_speculation_clean_here(dtrace_state_t *state)
2629 2728 {
2630 2729 dtrace_icookie_t cookie;
2631 2730 processorid_t cpu = CPU->cpu_id;
2632 2731 dtrace_buffer_t *dest = &state->dts_buffer[cpu];
2633 2732 dtrace_specid_t i;
2634 2733
2635 2734 cookie = dtrace_interrupt_disable();
2636 2735
2637 2736 if (dest->dtb_tomax == NULL) {
2638 2737 dtrace_interrupt_enable(cookie);
2639 2738 return;
2640 2739 }
2641 2740
2642 2741 for (i = 0; i < state->dts_nspeculations; i++) {
2643 2742 dtrace_speculation_t *spec = &state->dts_speculations[i];
2644 2743 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
2645 2744
2646 2745 if (src->dtb_tomax == NULL)
2647 2746 continue;
2648 2747
2649 2748 if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
2650 2749 src->dtb_offset = 0;
2651 2750 continue;
2652 2751 }
2653 2752
2654 2753 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2655 2754 continue;
2656 2755
2657 2756 if (src->dtb_offset == 0)
2658 2757 continue;
2659 2758
2660 2759 dtrace_speculation_commit(state, cpu, i + 1);
2661 2760 }
2662 2761
2663 2762 dtrace_interrupt_enable(cookie);
2664 2763 }
2665 2764
2666 2765 /*
2667 2766 * Note: not called from probe context. This function is called
2668 2767 * asynchronously (and at a regular interval) to clean any speculations that
2669 2768 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2670 2769 * is work to be done, it cross calls all CPUs to perform that work;
2671 2770 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2672 2771 * INACTIVE state until they have been cleaned by all CPUs.
2673 2772 */
2674 2773 static void
2675 2774 dtrace_speculation_clean(dtrace_state_t *state)
2676 2775 {
2677 2776 int work = 0, rv;
2678 2777 dtrace_specid_t i;
2679 2778
2680 2779 for (i = 0; i < state->dts_nspeculations; i++) {
2681 2780 dtrace_speculation_t *spec = &state->dts_speculations[i];
2682 2781
2683 2782 ASSERT(!spec->dtsp_cleaning);
2684 2783
2685 2784 if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
2686 2785 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2687 2786 continue;
2688 2787
2689 2788 work++;
2690 2789 spec->dtsp_cleaning = 1;
2691 2790 }
2692 2791
2693 2792 if (!work)
2694 2793 return;
2695 2794
2696 2795 dtrace_xcall(DTRACE_CPUALL,
2697 2796 (dtrace_xcall_t)dtrace_speculation_clean_here, state);
2698 2797
2699 2798 /*
2700 2799 * We now know that all CPUs have committed or discarded their
2701 2800 * speculation buffers, as appropriate. We can now set the state
2702 2801 * to inactive.
2703 2802 */
2704 2803 for (i = 0; i < state->dts_nspeculations; i++) {
2705 2804 dtrace_speculation_t *spec = &state->dts_speculations[i];
2706 2805 dtrace_speculation_state_t current, new;
2707 2806
2708 2807 if (!spec->dtsp_cleaning)
2709 2808 continue;
2710 2809
2711 2810 current = spec->dtsp_state;
2712 2811 ASSERT(current == DTRACESPEC_DISCARDING ||
2713 2812 current == DTRACESPEC_COMMITTINGMANY);
2714 2813
2715 2814 new = DTRACESPEC_INACTIVE;
2716 2815
2717 2816 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
2718 2817 ASSERT(rv == current);
2719 2818 spec->dtsp_cleaning = 0;
2720 2819 }
2721 2820 }
2722 2821
2723 2822 /*
2724 2823 * Called as part of a speculate() to get the speculative buffer associated
2725 2824 * with a given speculation. Returns NULL if the specified speculation is not
2726 2825 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2727 2826 * the active CPU is not the specified CPU -- the speculation will be
2728 2827 * atomically transitioned into the ACTIVEMANY state.
2729 2828 */
2730 2829 static dtrace_buffer_t *
2731 2830 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
2732 2831 dtrace_specid_t which)
2733 2832 {
2734 2833 dtrace_speculation_t *spec;
2735 2834 dtrace_speculation_state_t current, new;
2736 2835 dtrace_buffer_t *buf;
2737 2836
2738 2837 if (which == 0)
2739 2838 return (NULL);
2740 2839
2741 2840 if (which > state->dts_nspeculations) {
2742 2841 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2743 2842 return (NULL);
2744 2843 }
2745 2844
2746 2845 spec = &state->dts_speculations[which - 1];
2747 2846 buf = &spec->dtsp_buffer[cpuid];
2748 2847
2749 2848 do {
2750 2849 current = spec->dtsp_state;
2751 2850
2752 2851 switch (current) {
2753 2852 case DTRACESPEC_INACTIVE:
2754 2853 case DTRACESPEC_COMMITTINGMANY:
2755 2854 case DTRACESPEC_DISCARDING:
2756 2855 return (NULL);
2757 2856
2758 2857 case DTRACESPEC_COMMITTING:
2759 2858 ASSERT(buf->dtb_offset == 0);
2760 2859 return (NULL);
2761 2860
2762 2861 case DTRACESPEC_ACTIVEONE:
2763 2862 /*
2764 2863 * This speculation is currently active on one CPU.
2765 2864 * Check the offset in the buffer; if it's non-zero,
2766 2865 * that CPU must be us (and we leave the state alone).
2767 2866 * If it's zero, assume that we're starting on a new
2768 2867 * CPU -- and change the state to indicate that the
2769 2868 * speculation is active on more than one CPU.
2770 2869 */
2771 2870 if (buf->dtb_offset != 0)
2772 2871 return (buf);
2773 2872
2774 2873 new = DTRACESPEC_ACTIVEMANY;
2775 2874 break;
2776 2875
2777 2876 case DTRACESPEC_ACTIVEMANY:
2778 2877 return (buf);
2779 2878
2780 2879 case DTRACESPEC_ACTIVE:
2781 2880 new = DTRACESPEC_ACTIVEONE;
2782 2881 break;
2783 2882
2784 2883 default:
2785 2884 ASSERT(0);
2786 2885 }
2787 2886 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2788 2887 current, new) != current);
2789 2888
2790 2889 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
2791 2890 return (buf);
2792 2891 }
2793 2892
2794 2893 /*
2795 2894 * Return a string. In the event that the user lacks the privilege to access
2796 2895 * arbitrary kernel memory, we copy the string out to scratch memory so that we
2797 2896 * don't fail access checking.
2798 2897 *
2799 2898 * dtrace_dif_variable() uses this routine as a helper for various
2800 2899 * builtin values such as 'execname' and 'probefunc.'
2801 2900 */
2802 2901 uintptr_t
2803 2902 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
2804 2903 dtrace_mstate_t *mstate)
2805 2904 {
2806 2905 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
2807 2906 uintptr_t ret;
2808 2907 size_t strsz;
2809 2908
2810 2909 /*
2811 2910 * The easy case: this probe is allowed to read all of memory, so
2812 2911 * we can just return this as a vanilla pointer.
2813 2912 */
2814 2913 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
2815 2914 return (addr);
2816 2915
2817 2916 /*
2818 2917 * This is the tougher case: we copy the string in question from
2819 2918 * kernel memory into scratch memory and return it that way: this
2820 2919 * ensures that we won't trip up when access checking tests the
2821 2920 * BYREF return value.
2822 2921 */
2823 2922 strsz = dtrace_strlen((char *)addr, size) + 1;
2824 2923
2825 2924 if (mstate->dtms_scratch_ptr + strsz >
2826 2925 mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
2827 2926 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
2828 2927 return (NULL);
2829 2928 }
2830 2929
2831 2930 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
2832 2931 strsz);
2833 2932 ret = mstate->dtms_scratch_ptr;
2834 2933 mstate->dtms_scratch_ptr += strsz;
2835 2934 return (ret);
2836 2935 }
2837 2936
2838 2937 /*
2839 2938 * This function implements the DIF emulator's variable lookups. The emulator
2840 2939 * passes a reserved variable identifier and optional built-in array index.
2841 2940 */
2842 2941 static uint64_t
2843 2942 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
2844 2943 uint64_t ndx)
2845 2944 {
2846 2945 /*
2847 2946 * If we're accessing one of the uncached arguments, we'll turn this
2848 2947 * into a reference in the args array.
2849 2948 */
2850 2949 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
2851 2950 ndx = v - DIF_VAR_ARG0;
2852 2951 v = DIF_VAR_ARGS;
2853 2952 }
2854 2953
2855 2954 switch (v) {
2856 2955 case DIF_VAR_ARGS:
2857 2956 if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
2858 2957 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
2859 2958 CPU_DTRACE_KPRIV;
2860 2959 return (0);
2861 2960 }
2862 2961
2863 2962 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
2864 2963 if (ndx >= sizeof (mstate->dtms_arg) /
2865 2964 sizeof (mstate->dtms_arg[0])) {
2866 2965 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
2867 2966 dtrace_provider_t *pv;
2868 2967 uint64_t val;
2869 2968
2870 2969 pv = mstate->dtms_probe->dtpr_provider;
2871 2970 if (pv->dtpv_pops.dtps_getargval != NULL)
2872 2971 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
2873 2972 mstate->dtms_probe->dtpr_id,
2874 2973 mstate->dtms_probe->dtpr_arg, ndx, aframes);
2875 2974 else
2876 2975 val = dtrace_getarg(ndx, aframes);
2877 2976
2878 2977 /*
2879 2978 * This is regrettably required to keep the compiler
2880 2979 * from tail-optimizing the call to dtrace_getarg().
2881 2980 * The condition always evaluates to true, but the
2882 2981 * compiler has no way of figuring that out a priori.
2883 2982 * (None of this would be necessary if the compiler
2884 2983 * could be relied upon to _always_ tail-optimize
2885 2984 * the call to dtrace_getarg() -- but it can't.)
2886 2985 */
2887 2986 if (mstate->dtms_probe != NULL)
2888 2987 return (val);
2889 2988
2890 2989 ASSERT(0);
2891 2990 }
2892 2991
2893 2992 return (mstate->dtms_arg[ndx]);
2894 2993
2895 2994 case DIF_VAR_UREGS: {
2896 2995 klwp_t *lwp;
2897 2996
2898 2997 if (!dtrace_priv_proc(state, mstate))
2899 2998 return (0);
2900 2999
2901 3000 if ((lwp = curthread->t_lwp) == NULL) {
2902 3001 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
2903 3002 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL;
2904 3003 return (0);
2905 3004 }
2906 3005
2907 3006 return (dtrace_getreg(lwp->lwp_regs, ndx));
2908 3007 }
2909 3008
2910 3009 case DIF_VAR_VMREGS: {
2911 3010 uint64_t rval;
2912 3011
2913 3012 if (!dtrace_priv_kernel(state))
2914 3013 return (0);
2915 3014
2916 3015 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
↓ open down ↓ |
1616 lines elided |
↑ open up ↑ |
2917 3016
2918 3017 rval = dtrace_getvmreg(ndx,
2919 3018 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
2920 3019
2921 3020 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2922 3021
2923 3022 return (rval);
2924 3023 }
2925 3024
2926 3025 case DIF_VAR_CURTHREAD:
2927 - if (!dtrace_priv_kernel(state))
3026 + if (!dtrace_priv_proc(state, mstate))
2928 3027 return (0);
2929 3028 return ((uint64_t)(uintptr_t)curthread);
2930 3029
2931 3030 case DIF_VAR_TIMESTAMP:
2932 3031 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
2933 3032 mstate->dtms_timestamp = dtrace_gethrtime();
2934 3033 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
2935 3034 }
2936 3035 return (mstate->dtms_timestamp);
2937 3036
2938 3037 case DIF_VAR_VTIMESTAMP:
2939 3038 ASSERT(dtrace_vtime_references != 0);
2940 3039 return (curthread->t_dtrace_vtime);
2941 3040
2942 3041 case DIF_VAR_WALLTIMESTAMP:
2943 3042 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
2944 3043 mstate->dtms_walltimestamp = dtrace_gethrestime();
2945 3044 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
2946 3045 }
2947 3046 return (mstate->dtms_walltimestamp);
2948 3047
2949 3048 case DIF_VAR_IPL:
2950 3049 if (!dtrace_priv_kernel(state))
2951 3050 return (0);
2952 3051 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
2953 3052 mstate->dtms_ipl = dtrace_getipl();
2954 3053 mstate->dtms_present |= DTRACE_MSTATE_IPL;
2955 3054 }
2956 3055 return (mstate->dtms_ipl);
2957 3056
2958 3057 case DIF_VAR_EPID:
2959 3058 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
2960 3059 return (mstate->dtms_epid);
2961 3060
2962 3061 case DIF_VAR_ID:
2963 3062 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
2964 3063 return (mstate->dtms_probe->dtpr_id);
2965 3064
2966 3065 case DIF_VAR_STACKDEPTH:
2967 3066 if (!dtrace_priv_kernel(state))
2968 3067 return (0);
2969 3068 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
2970 3069 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
2971 3070
2972 3071 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
2973 3072 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
2974 3073 }
2975 3074 return (mstate->dtms_stackdepth);
2976 3075
2977 3076 case DIF_VAR_USTACKDEPTH:
2978 3077 if (!dtrace_priv_proc(state, mstate))
2979 3078 return (0);
2980 3079 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
2981 3080 /*
2982 3081 * See comment in DIF_VAR_PID.
2983 3082 */
2984 3083 if (DTRACE_ANCHORED(mstate->dtms_probe) &&
2985 3084 CPU_ON_INTR(CPU)) {
2986 3085 mstate->dtms_ustackdepth = 0;
2987 3086 } else {
2988 3087 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
2989 3088 mstate->dtms_ustackdepth =
2990 3089 dtrace_getustackdepth();
2991 3090 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2992 3091 }
2993 3092 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
2994 3093 }
2995 3094 return (mstate->dtms_ustackdepth);
2996 3095
2997 3096 case DIF_VAR_CALLER:
2998 3097 if (!dtrace_priv_kernel(state))
2999 3098 return (0);
3000 3099 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
3001 3100 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3002 3101
3003 3102 if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
3004 3103 /*
3005 3104 * If this is an unanchored probe, we are
3006 3105 * required to go through the slow path:
3007 3106 * dtrace_caller() only guarantees correct
3008 3107 * results for anchored probes.
3009 3108 */
3010 3109 pc_t caller[2];
3011 3110
3012 3111 dtrace_getpcstack(caller, 2, aframes,
3013 3112 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
3014 3113 mstate->dtms_caller = caller[1];
3015 3114 } else if ((mstate->dtms_caller =
3016 3115 dtrace_caller(aframes)) == -1) {
3017 3116 /*
3018 3117 * We have failed to do this the quick way;
3019 3118 * we must resort to the slower approach of
3020 3119 * calling dtrace_getpcstack().
3021 3120 */
3022 3121 pc_t caller;
3023 3122
3024 3123 dtrace_getpcstack(&caller, 1, aframes, NULL);
3025 3124 mstate->dtms_caller = caller;
3026 3125 }
3027 3126
3028 3127 mstate->dtms_present |= DTRACE_MSTATE_CALLER;
3029 3128 }
3030 3129 return (mstate->dtms_caller);
3031 3130
3032 3131 case DIF_VAR_UCALLER:
3033 3132 if (!dtrace_priv_proc(state, mstate))
3034 3133 return (0);
3035 3134
3036 3135 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3037 3136 uint64_t ustack[3];
3038 3137
3039 3138 /*
3040 3139 * dtrace_getupcstack() fills in the first uint64_t
3041 3140 * with the current PID. The second uint64_t will
3042 3141 * be the program counter at user-level. The third
3043 3142 * uint64_t will contain the caller, which is what
3044 3143 * we're after.
3045 3144 */
3046 3145 ustack[2] = NULL;
3047 3146 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3048 3147 dtrace_getupcstack(ustack, 3);
3049 3148 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3050 3149 mstate->dtms_ucaller = ustack[2];
3051 3150 mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3052 3151 }
3053 3152
3054 3153 return (mstate->dtms_ucaller);
3055 3154
3056 3155 case DIF_VAR_PROBEPROV:
3057 3156 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3058 3157 return (dtrace_dif_varstr(
3059 3158 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3060 3159 state, mstate));
3061 3160
3062 3161 case DIF_VAR_PROBEMOD:
3063 3162 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3064 3163 return (dtrace_dif_varstr(
3065 3164 (uintptr_t)mstate->dtms_probe->dtpr_mod,
3066 3165 state, mstate));
3067 3166
3068 3167 case DIF_VAR_PROBEFUNC:
3069 3168 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3070 3169 return (dtrace_dif_varstr(
3071 3170 (uintptr_t)mstate->dtms_probe->dtpr_func,
3072 3171 state, mstate));
3073 3172
3074 3173 case DIF_VAR_PROBENAME:
3075 3174 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3076 3175 return (dtrace_dif_varstr(
3077 3176 (uintptr_t)mstate->dtms_probe->dtpr_name,
3078 3177 state, mstate));
3079 3178
3080 3179 case DIF_VAR_PID:
3081 3180 if (!dtrace_priv_proc(state, mstate))
3082 3181 return (0);
3083 3182
3084 3183 /*
3085 3184 * Note that we are assuming that an unanchored probe is
3086 3185 * always due to a high-level interrupt. (And we're assuming
3087 3186 * that there is only a single high level interrupt.)
3088 3187 */
3089 3188 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3090 3189 return (pid0.pid_id);
3091 3190
3092 3191 /*
3093 3192 * It is always safe to dereference one's own t_procp pointer:
3094 3193 * it always points to a valid, allocated proc structure.
3095 3194 * Further, it is always safe to dereference the p_pidp member
3096 3195 * of one's own proc structure. (These are truisms becuase
3097 3196 * threads and processes don't clean up their own state --
3098 3197 * they leave that task to whomever reaps them.)
3099 3198 */
3100 3199 return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
3101 3200
3102 3201 case DIF_VAR_PPID:
3103 3202 if (!dtrace_priv_proc(state, mstate))
3104 3203 return (0);
3105 3204
3106 3205 /*
3107 3206 * See comment in DIF_VAR_PID.
3108 3207 */
3109 3208 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3110 3209 return (pid0.pid_id);
3111 3210
3112 3211 /*
3113 3212 * It is always safe to dereference one's own t_procp pointer:
3114 3213 * it always points to a valid, allocated proc structure.
3115 3214 * (This is true because threads don't clean up their own
3116 3215 * state -- they leave that task to whomever reaps them.)
3117 3216 */
3118 3217 return ((uint64_t)curthread->t_procp->p_ppid);
3119 3218
3120 3219 case DIF_VAR_TID:
3121 3220 /*
3122 3221 * See comment in DIF_VAR_PID.
3123 3222 */
3124 3223 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3125 3224 return (0);
3126 3225
3127 3226 return ((uint64_t)curthread->t_tid);
3128 3227
3129 3228 case DIF_VAR_EXECNAME:
3130 3229 if (!dtrace_priv_proc(state, mstate))
3131 3230 return (0);
3132 3231
3133 3232 /*
3134 3233 * See comment in DIF_VAR_PID.
3135 3234 */
3136 3235 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3137 3236 return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
3138 3237
3139 3238 /*
3140 3239 * It is always safe to dereference one's own t_procp pointer:
3141 3240 * it always points to a valid, allocated proc structure.
3142 3241 * (This is true because threads don't clean up their own
3143 3242 * state -- they leave that task to whomever reaps them.)
3144 3243 */
3145 3244 return (dtrace_dif_varstr(
3146 3245 (uintptr_t)curthread->t_procp->p_user.u_comm,
3147 3246 state, mstate));
3148 3247
3149 3248 case DIF_VAR_ZONENAME:
3150 3249 if (!dtrace_priv_proc(state, mstate))
3151 3250 return (0);
3152 3251
3153 3252 /*
3154 3253 * See comment in DIF_VAR_PID.
3155 3254 */
3156 3255 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3157 3256 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
3158 3257
3159 3258 /*
3160 3259 * It is always safe to dereference one's own t_procp pointer:
3161 3260 * it always points to a valid, allocated proc structure.
3162 3261 * (This is true because threads don't clean up their own
3163 3262 * state -- they leave that task to whomever reaps them.)
3164 3263 */
3165 3264 return (dtrace_dif_varstr(
3166 3265 (uintptr_t)curthread->t_procp->p_zone->zone_name,
3167 3266 state, mstate));
3168 3267
3169 3268 case DIF_VAR_UID:
3170 3269 if (!dtrace_priv_proc(state, mstate))
3171 3270 return (0);
3172 3271
3173 3272 /*
3174 3273 * See comment in DIF_VAR_PID.
3175 3274 */
3176 3275 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3177 3276 return ((uint64_t)p0.p_cred->cr_uid);
3178 3277
3179 3278 /*
3180 3279 * It is always safe to dereference one's own t_procp pointer:
3181 3280 * it always points to a valid, allocated proc structure.
3182 3281 * (This is true because threads don't clean up their own
3183 3282 * state -- they leave that task to whomever reaps them.)
3184 3283 *
3185 3284 * Additionally, it is safe to dereference one's own process
3186 3285 * credential, since this is never NULL after process birth.
3187 3286 */
3188 3287 return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
3189 3288
3190 3289 case DIF_VAR_GID:
3191 3290 if (!dtrace_priv_proc(state, mstate))
3192 3291 return (0);
3193 3292
3194 3293 /*
3195 3294 * See comment in DIF_VAR_PID.
3196 3295 */
3197 3296 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3198 3297 return ((uint64_t)p0.p_cred->cr_gid);
3199 3298
3200 3299 /*
3201 3300 * It is always safe to dereference one's own t_procp pointer:
3202 3301 * it always points to a valid, allocated proc structure.
3203 3302 * (This is true because threads don't clean up their own
3204 3303 * state -- they leave that task to whomever reaps them.)
3205 3304 *
3206 3305 * Additionally, it is safe to dereference one's own process
3207 3306 * credential, since this is never NULL after process birth.
3208 3307 */
3209 3308 return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
3210 3309
3211 3310 case DIF_VAR_ERRNO: {
3212 3311 klwp_t *lwp;
3213 3312 if (!dtrace_priv_proc(state, mstate))
3214 3313 return (0);
3215 3314
3216 3315 /*
3217 3316 * See comment in DIF_VAR_PID.
3218 3317 */
3219 3318 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3220 3319 return (0);
3221 3320
3222 3321 /*
3223 3322 * It is always safe to dereference one's own t_lwp pointer in
3224 3323 * the event that this pointer is non-NULL. (This is true
3225 3324 * because threads and lwps don't clean up their own state --
3226 3325 * they leave that task to whomever reaps them.)
3227 3326 */
3228 3327 if ((lwp = curthread->t_lwp) == NULL)
3229 3328 return (0);
3230 3329
3231 3330 return ((uint64_t)lwp->lwp_errno);
3232 3331 }
3233 3332 default:
3234 3333 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3235 3334 return (0);
3236 3335 }
3237 3336 }
3238 3337
3239 3338 /*
3240 3339 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3241 3340 * Notice that we don't bother validating the proper number of arguments or
3242 3341 * their types in the tuple stack. This isn't needed because all argument
3243 3342 * interpretation is safe because of our load safety -- the worst that can
3244 3343 * happen is that a bogus program can obtain bogus results.
3245 3344 */
3246 3345 static void
3247 3346 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
3248 3347 dtrace_key_t *tupregs, int nargs,
3249 3348 dtrace_mstate_t *mstate, dtrace_state_t *state)
3250 3349 {
3251 3350 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
3252 3351 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
3253 3352 dtrace_vstate_t *vstate = &state->dts_vstate;
3254 3353
3255 3354 union {
3256 3355 mutex_impl_t mi;
3257 3356 uint64_t mx;
3258 3357 } m;
3259 3358
3260 3359 union {
3261 3360 krwlock_t ri;
3262 3361 uintptr_t rw;
3263 3362 } r;
3264 3363
3265 3364 switch (subr) {
3266 3365 case DIF_SUBR_RAND:
3267 3366 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3268 3367 break;
3269 3368
3270 3369 case DIF_SUBR_MUTEX_OWNED:
3271 3370 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3272 3371 mstate, vstate)) {
3273 3372 regs[rd] = NULL;
3274 3373 break;
3275 3374 }
3276 3375
3277 3376 m.mx = dtrace_load64(tupregs[0].dttk_value);
3278 3377 if (MUTEX_TYPE_ADAPTIVE(&m.mi))
3279 3378 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
3280 3379 else
3281 3380 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
3282 3381 break;
3283 3382
3284 3383 case DIF_SUBR_MUTEX_OWNER:
3285 3384 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3286 3385 mstate, vstate)) {
3287 3386 regs[rd] = NULL;
3288 3387 break;
3289 3388 }
3290 3389
3291 3390 m.mx = dtrace_load64(tupregs[0].dttk_value);
3292 3391 if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
3293 3392 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
3294 3393 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
3295 3394 else
3296 3395 regs[rd] = 0;
3297 3396 break;
3298 3397
3299 3398 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
3300 3399 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3301 3400 mstate, vstate)) {
3302 3401 regs[rd] = NULL;
3303 3402 break;
3304 3403 }
3305 3404
3306 3405 m.mx = dtrace_load64(tupregs[0].dttk_value);
3307 3406 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
3308 3407 break;
3309 3408
3310 3409 case DIF_SUBR_MUTEX_TYPE_SPIN:
3311 3410 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3312 3411 mstate, vstate)) {
3313 3412 regs[rd] = NULL;
3314 3413 break;
3315 3414 }
3316 3415
3317 3416 m.mx = dtrace_load64(tupregs[0].dttk_value);
3318 3417 regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
3319 3418 break;
3320 3419
3321 3420 case DIF_SUBR_RW_READ_HELD: {
3322 3421 uintptr_t tmp;
3323 3422
3324 3423 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
3325 3424 mstate, vstate)) {
3326 3425 regs[rd] = NULL;
3327 3426 break;
3328 3427 }
3329 3428
3330 3429 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3331 3430 regs[rd] = _RW_READ_HELD(&r.ri, tmp);
3332 3431 break;
3333 3432 }
3334 3433
3335 3434 case DIF_SUBR_RW_WRITE_HELD:
3336 3435 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3337 3436 mstate, vstate)) {
3338 3437 regs[rd] = NULL;
3339 3438 break;
3340 3439 }
3341 3440
3342 3441 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3343 3442 regs[rd] = _RW_WRITE_HELD(&r.ri);
3344 3443 break;
3345 3444
3346 3445 case DIF_SUBR_RW_ISWRITER:
3347 3446 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3348 3447 mstate, vstate)) {
3349 3448 regs[rd] = NULL;
3350 3449 break;
3351 3450 }
3352 3451
3353 3452 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3354 3453 regs[rd] = _RW_ISWRITER(&r.ri);
3355 3454 break;
3356 3455
3357 3456 case DIF_SUBR_BCOPY: {
3358 3457 /*
3359 3458 * We need to be sure that the destination is in the scratch
3360 3459 * region -- no other region is allowed.
3361 3460 */
3362 3461 uintptr_t src = tupregs[0].dttk_value;
3363 3462 uintptr_t dest = tupregs[1].dttk_value;
3364 3463 size_t size = tupregs[2].dttk_value;
3365 3464
3366 3465 if (!dtrace_inscratch(dest, size, mstate)) {
3367 3466 *flags |= CPU_DTRACE_BADADDR;
3368 3467 *illval = regs[rd];
3369 3468 break;
3370 3469 }
3371 3470
3372 3471 if (!dtrace_canload(src, size, mstate, vstate)) {
3373 3472 regs[rd] = NULL;
3374 3473 break;
3375 3474 }
3376 3475
3377 3476 dtrace_bcopy((void *)src, (void *)dest, size);
3378 3477 break;
3379 3478 }
3380 3479
3381 3480 case DIF_SUBR_ALLOCA:
3382 3481 case DIF_SUBR_COPYIN: {
3383 3482 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
3384 3483 uint64_t size =
3385 3484 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
3386 3485 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
3387 3486
3388 3487 /*
3389 3488 * This action doesn't require any credential checks since
3390 3489 * probes will not activate in user contexts to which the
3391 3490 * enabling user does not have permissions.
3392 3491 */
3393 3492
3394 3493 /*
3395 3494 * Rounding up the user allocation size could have overflowed
3396 3495 * a large, bogus allocation (like -1ULL) to 0.
3397 3496 */
3398 3497 if (scratch_size < size ||
3399 3498 !DTRACE_INSCRATCH(mstate, scratch_size)) {
3400 3499 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3401 3500 regs[rd] = NULL;
3402 3501 break;
3403 3502 }
3404 3503
3405 3504 if (subr == DIF_SUBR_COPYIN) {
3406 3505 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3407 3506 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
3408 3507 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3409 3508 }
3410 3509
3411 3510 mstate->dtms_scratch_ptr += scratch_size;
3412 3511 regs[rd] = dest;
3413 3512 break;
3414 3513 }
3415 3514
3416 3515 case DIF_SUBR_COPYINTO: {
3417 3516 uint64_t size = tupregs[1].dttk_value;
3418 3517 uintptr_t dest = tupregs[2].dttk_value;
3419 3518
3420 3519 /*
3421 3520 * This action doesn't require any credential checks since
3422 3521 * probes will not activate in user contexts to which the
3423 3522 * enabling user does not have permissions.
3424 3523 */
3425 3524 if (!dtrace_inscratch(dest, size, mstate)) {
3426 3525 *flags |= CPU_DTRACE_BADADDR;
3427 3526 *illval = regs[rd];
3428 3527 break;
3429 3528 }
3430 3529
3431 3530 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3432 3531 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
3433 3532 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3434 3533 break;
3435 3534 }
3436 3535
3437 3536 case DIF_SUBR_COPYINSTR: {
3438 3537 uintptr_t dest = mstate->dtms_scratch_ptr;
3439 3538 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3440 3539
3441 3540 if (nargs > 1 && tupregs[1].dttk_value < size)
3442 3541 size = tupregs[1].dttk_value + 1;
3443 3542
3444 3543 /*
3445 3544 * This action doesn't require any credential checks since
3446 3545 * probes will not activate in user contexts to which the
3447 3546 * enabling user does not have permissions.
3448 3547 */
3449 3548 if (!DTRACE_INSCRATCH(mstate, size)) {
3450 3549 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3451 3550 regs[rd] = NULL;
3452 3551 break;
3453 3552 }
3454 3553
3455 3554 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3456 3555 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
3457 3556 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3458 3557
3459 3558 ((char *)dest)[size - 1] = '\0';
3460 3559 mstate->dtms_scratch_ptr += size;
3461 3560 regs[rd] = dest;
3462 3561 break;
3463 3562 }
3464 3563
3465 3564 case DIF_SUBR_MSGSIZE:
3466 3565 case DIF_SUBR_MSGDSIZE: {
3467 3566 uintptr_t baddr = tupregs[0].dttk_value, daddr;
3468 3567 uintptr_t wptr, rptr;
3469 3568 size_t count = 0;
3470 3569 int cont = 0;
3471 3570
3472 3571 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
3473 3572
3474 3573 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
3475 3574 vstate)) {
3476 3575 regs[rd] = NULL;
3477 3576 break;
3478 3577 }
3479 3578
3480 3579 wptr = dtrace_loadptr(baddr +
3481 3580 offsetof(mblk_t, b_wptr));
3482 3581
3483 3582 rptr = dtrace_loadptr(baddr +
3484 3583 offsetof(mblk_t, b_rptr));
3485 3584
3486 3585 if (wptr < rptr) {
3487 3586 *flags |= CPU_DTRACE_BADADDR;
3488 3587 *illval = tupregs[0].dttk_value;
3489 3588 break;
3490 3589 }
3491 3590
3492 3591 daddr = dtrace_loadptr(baddr +
3493 3592 offsetof(mblk_t, b_datap));
3494 3593
3495 3594 baddr = dtrace_loadptr(baddr +
3496 3595 offsetof(mblk_t, b_cont));
3497 3596
3498 3597 /*
3499 3598 * We want to prevent against denial-of-service here,
3500 3599 * so we're only going to search the list for
3501 3600 * dtrace_msgdsize_max mblks.
3502 3601 */
3503 3602 if (cont++ > dtrace_msgdsize_max) {
3504 3603 *flags |= CPU_DTRACE_ILLOP;
3505 3604 break;
3506 3605 }
3507 3606
3508 3607 if (subr == DIF_SUBR_MSGDSIZE) {
3509 3608 if (dtrace_load8(daddr +
3510 3609 offsetof(dblk_t, db_type)) != M_DATA)
3511 3610 continue;
3512 3611 }
3513 3612
3514 3613 count += wptr - rptr;
3515 3614 }
3516 3615
3517 3616 if (!(*flags & CPU_DTRACE_FAULT))
3518 3617 regs[rd] = count;
3519 3618
3520 3619 break;
3521 3620 }
3522 3621
3523 3622 case DIF_SUBR_PROGENYOF: {
3524 3623 pid_t pid = tupregs[0].dttk_value;
3525 3624 proc_t *p;
3526 3625 int rval = 0;
3527 3626
3528 3627 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3529 3628
3530 3629 for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
3531 3630 if (p->p_pidp->pid_id == pid) {
3532 3631 rval = 1;
3533 3632 break;
3534 3633 }
3535 3634 }
3536 3635
3537 3636 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3538 3637
3539 3638 regs[rd] = rval;
3540 3639 break;
3541 3640 }
3542 3641
3543 3642 case DIF_SUBR_SPECULATION:
3544 3643 regs[rd] = dtrace_speculation(state);
3545 3644 break;
3546 3645
3547 3646 case DIF_SUBR_COPYOUT: {
3548 3647 uintptr_t kaddr = tupregs[0].dttk_value;
3549 3648 uintptr_t uaddr = tupregs[1].dttk_value;
3550 3649 uint64_t size = tupregs[2].dttk_value;
3551 3650
3552 3651 if (!dtrace_destructive_disallow &&
3553 3652 dtrace_priv_proc_control(state, mstate) &&
3554 3653 !dtrace_istoxic(kaddr, size)) {
3555 3654 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3556 3655 dtrace_copyout(kaddr, uaddr, size, flags);
3557 3656 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3558 3657 }
3559 3658 break;
3560 3659 }
3561 3660
3562 3661 case DIF_SUBR_COPYOUTSTR: {
3563 3662 uintptr_t kaddr = tupregs[0].dttk_value;
3564 3663 uintptr_t uaddr = tupregs[1].dttk_value;
3565 3664 uint64_t size = tupregs[2].dttk_value;
3566 3665
3567 3666 if (!dtrace_destructive_disallow &&
3568 3667 dtrace_priv_proc_control(state, mstate) &&
3569 3668 !dtrace_istoxic(kaddr, size)) {
3570 3669 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3571 3670 dtrace_copyoutstr(kaddr, uaddr, size, flags);
3572 3671 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3573 3672 }
3574 3673 break;
3575 3674 }
3576 3675
3577 3676 case DIF_SUBR_STRLEN: {
3578 3677 size_t sz;
3579 3678 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
3580 3679 sz = dtrace_strlen((char *)addr,
3581 3680 state->dts_options[DTRACEOPT_STRSIZE]);
3582 3681
3583 3682 if (!dtrace_canload(addr, sz + 1, mstate, vstate)) {
3584 3683 regs[rd] = NULL;
3585 3684 break;
3586 3685 }
3587 3686
3588 3687 regs[rd] = sz;
3589 3688
3590 3689 break;
3591 3690 }
3592 3691
3593 3692 case DIF_SUBR_STRCHR:
3594 3693 case DIF_SUBR_STRRCHR: {
3595 3694 /*
3596 3695 * We're going to iterate over the string looking for the
3597 3696 * specified character. We will iterate until we have reached
3598 3697 * the string length or we have found the character. If this
3599 3698 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3600 3699 * of the specified character instead of the first.
3601 3700 */
3602 3701 uintptr_t saddr = tupregs[0].dttk_value;
3603 3702 uintptr_t addr = tupregs[0].dttk_value;
3604 3703 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE];
3605 3704 char c, target = (char)tupregs[1].dttk_value;
3606 3705
3607 3706 for (regs[rd] = NULL; addr < limit; addr++) {
3608 3707 if ((c = dtrace_load8(addr)) == target) {
3609 3708 regs[rd] = addr;
3610 3709
3611 3710 if (subr == DIF_SUBR_STRCHR)
3612 3711 break;
3613 3712 }
3614 3713
3615 3714 if (c == '\0')
3616 3715 break;
3617 3716 }
3618 3717
3619 3718 if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) {
3620 3719 regs[rd] = NULL;
3621 3720 break;
3622 3721 }
3623 3722
3624 3723 break;
3625 3724 }
3626 3725
3627 3726 case DIF_SUBR_STRSTR:
3628 3727 case DIF_SUBR_INDEX:
3629 3728 case DIF_SUBR_RINDEX: {
3630 3729 /*
3631 3730 * We're going to iterate over the string looking for the
3632 3731 * specified string. We will iterate until we have reached
3633 3732 * the string length or we have found the string. (Yes, this
3634 3733 * is done in the most naive way possible -- but considering
3635 3734 * that the string we're searching for is likely to be
3636 3735 * relatively short, the complexity of Rabin-Karp or similar
3637 3736 * hardly seems merited.)
3638 3737 */
3639 3738 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
3640 3739 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
3641 3740 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3642 3741 size_t len = dtrace_strlen(addr, size);
3643 3742 size_t sublen = dtrace_strlen(substr, size);
3644 3743 char *limit = addr + len, *orig = addr;
3645 3744 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
3646 3745 int inc = 1;
3647 3746
3648 3747 regs[rd] = notfound;
3649 3748
3650 3749 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
3651 3750 regs[rd] = NULL;
3652 3751 break;
3653 3752 }
3654 3753
3655 3754 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
3656 3755 vstate)) {
3657 3756 regs[rd] = NULL;
3658 3757 break;
3659 3758 }
3660 3759
3661 3760 /*
3662 3761 * strstr() and index()/rindex() have similar semantics if
3663 3762 * both strings are the empty string: strstr() returns a
3664 3763 * pointer to the (empty) string, and index() and rindex()
3665 3764 * both return index 0 (regardless of any position argument).
3666 3765 */
3667 3766 if (sublen == 0 && len == 0) {
3668 3767 if (subr == DIF_SUBR_STRSTR)
3669 3768 regs[rd] = (uintptr_t)addr;
3670 3769 else
3671 3770 regs[rd] = 0;
3672 3771 break;
3673 3772 }
3674 3773
3675 3774 if (subr != DIF_SUBR_STRSTR) {
3676 3775 if (subr == DIF_SUBR_RINDEX) {
3677 3776 limit = orig - 1;
3678 3777 addr += len;
3679 3778 inc = -1;
3680 3779 }
3681 3780
3682 3781 /*
3683 3782 * Both index() and rindex() take an optional position
3684 3783 * argument that denotes the starting position.
3685 3784 */
3686 3785 if (nargs == 3) {
3687 3786 int64_t pos = (int64_t)tupregs[2].dttk_value;
3688 3787
3689 3788 /*
3690 3789 * If the position argument to index() is
3691 3790 * negative, Perl implicitly clamps it at
3692 3791 * zero. This semantic is a little surprising
3693 3792 * given the special meaning of negative
3694 3793 * positions to similar Perl functions like
3695 3794 * substr(), but it appears to reflect a
3696 3795 * notion that index() can start from a
3697 3796 * negative index and increment its way up to
3698 3797 * the string. Given this notion, Perl's
3699 3798 * rindex() is at least self-consistent in
3700 3799 * that it implicitly clamps positions greater
3701 3800 * than the string length to be the string
3702 3801 * length. Where Perl completely loses
3703 3802 * coherence, however, is when the specified
3704 3803 * substring is the empty string (""). In
3705 3804 * this case, even if the position is
3706 3805 * negative, rindex() returns 0 -- and even if
3707 3806 * the position is greater than the length,
3708 3807 * index() returns the string length. These
3709 3808 * semantics violate the notion that index()
3710 3809 * should never return a value less than the
3711 3810 * specified position and that rindex() should
3712 3811 * never return a value greater than the
3713 3812 * specified position. (One assumes that
3714 3813 * these semantics are artifacts of Perl's
3715 3814 * implementation and not the results of
3716 3815 * deliberate design -- it beggars belief that
3717 3816 * even Larry Wall could desire such oddness.)
3718 3817 * While in the abstract one would wish for
3719 3818 * consistent position semantics across
3720 3819 * substr(), index() and rindex() -- or at the
3721 3820 * very least self-consistent position
3722 3821 * semantics for index() and rindex() -- we
3723 3822 * instead opt to keep with the extant Perl
3724 3823 * semantics, in all their broken glory. (Do
3725 3824 * we have more desire to maintain Perl's
3726 3825 * semantics than Perl does? Probably.)
3727 3826 */
3728 3827 if (subr == DIF_SUBR_RINDEX) {
3729 3828 if (pos < 0) {
3730 3829 if (sublen == 0)
3731 3830 regs[rd] = 0;
3732 3831 break;
3733 3832 }
3734 3833
3735 3834 if (pos > len)
3736 3835 pos = len;
3737 3836 } else {
3738 3837 if (pos < 0)
3739 3838 pos = 0;
3740 3839
3741 3840 if (pos >= len) {
3742 3841 if (sublen == 0)
3743 3842 regs[rd] = len;
3744 3843 break;
3745 3844 }
3746 3845 }
3747 3846
3748 3847 addr = orig + pos;
3749 3848 }
3750 3849 }
3751 3850
3752 3851 for (regs[rd] = notfound; addr != limit; addr += inc) {
3753 3852 if (dtrace_strncmp(addr, substr, sublen) == 0) {
3754 3853 if (subr != DIF_SUBR_STRSTR) {
3755 3854 /*
3756 3855 * As D index() and rindex() are
3757 3856 * modeled on Perl (and not on awk),
3758 3857 * we return a zero-based (and not a
3759 3858 * one-based) index. (For you Perl
3760 3859 * weenies: no, we're not going to add
3761 3860 * $[ -- and shouldn't you be at a con
3762 3861 * or something?)
3763 3862 */
3764 3863 regs[rd] = (uintptr_t)(addr - orig);
3765 3864 break;
3766 3865 }
3767 3866
3768 3867 ASSERT(subr == DIF_SUBR_STRSTR);
3769 3868 regs[rd] = (uintptr_t)addr;
3770 3869 break;
3771 3870 }
3772 3871 }
3773 3872
3774 3873 break;
3775 3874 }
3776 3875
3777 3876 case DIF_SUBR_STRTOK: {
3778 3877 uintptr_t addr = tupregs[0].dttk_value;
3779 3878 uintptr_t tokaddr = tupregs[1].dttk_value;
3780 3879 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3781 3880 uintptr_t limit, toklimit = tokaddr + size;
3782 3881 uint8_t c, tokmap[32]; /* 256 / 8 */
3783 3882 char *dest = (char *)mstate->dtms_scratch_ptr;
3784 3883 int i;
3785 3884
3786 3885 /*
3787 3886 * Check both the token buffer and (later) the input buffer,
3788 3887 * since both could be non-scratch addresses.
3789 3888 */
3790 3889 if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) {
3791 3890 regs[rd] = NULL;
3792 3891 break;
3793 3892 }
3794 3893
3795 3894 if (!DTRACE_INSCRATCH(mstate, size)) {
3796 3895 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3797 3896 regs[rd] = NULL;
3798 3897 break;
3799 3898 }
3800 3899
3801 3900 if (addr == NULL) {
3802 3901 /*
3803 3902 * If the address specified is NULL, we use our saved
3804 3903 * strtok pointer from the mstate. Note that this
3805 3904 * means that the saved strtok pointer is _only_
3806 3905 * valid within multiple enablings of the same probe --
3807 3906 * it behaves like an implicit clause-local variable.
3808 3907 */
3809 3908 addr = mstate->dtms_strtok;
3810 3909 } else {
3811 3910 /*
3812 3911 * If the user-specified address is non-NULL we must
3813 3912 * access check it. This is the only time we have
3814 3913 * a chance to do so, since this address may reside
3815 3914 * in the string table of this clause-- future calls
3816 3915 * (when we fetch addr from mstate->dtms_strtok)
3817 3916 * would fail this access check.
3818 3917 */
3819 3918 if (!dtrace_strcanload(addr, size, mstate, vstate)) {
3820 3919 regs[rd] = NULL;
3821 3920 break;
3822 3921 }
3823 3922 }
3824 3923
3825 3924 /*
3826 3925 * First, zero the token map, and then process the token
3827 3926 * string -- setting a bit in the map for every character
3828 3927 * found in the token string.
3829 3928 */
3830 3929 for (i = 0; i < sizeof (tokmap); i++)
3831 3930 tokmap[i] = 0;
3832 3931
3833 3932 for (; tokaddr < toklimit; tokaddr++) {
3834 3933 if ((c = dtrace_load8(tokaddr)) == '\0')
3835 3934 break;
3836 3935
3837 3936 ASSERT((c >> 3) < sizeof (tokmap));
3838 3937 tokmap[c >> 3] |= (1 << (c & 0x7));
3839 3938 }
3840 3939
3841 3940 for (limit = addr + size; addr < limit; addr++) {
3842 3941 /*
3843 3942 * We're looking for a character that is _not_ contained
3844 3943 * in the token string.
3845 3944 */
3846 3945 if ((c = dtrace_load8(addr)) == '\0')
3847 3946 break;
3848 3947
3849 3948 if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
3850 3949 break;
3851 3950 }
3852 3951
3853 3952 if (c == '\0') {
3854 3953 /*
3855 3954 * We reached the end of the string without finding
3856 3955 * any character that was not in the token string.
3857 3956 * We return NULL in this case, and we set the saved
3858 3957 * address to NULL as well.
3859 3958 */
3860 3959 regs[rd] = NULL;
3861 3960 mstate->dtms_strtok = NULL;
3862 3961 break;
3863 3962 }
3864 3963
3865 3964 /*
3866 3965 * From here on, we're copying into the destination string.
3867 3966 */
3868 3967 for (i = 0; addr < limit && i < size - 1; addr++) {
3869 3968 if ((c = dtrace_load8(addr)) == '\0')
3870 3969 break;
3871 3970
3872 3971 if (tokmap[c >> 3] & (1 << (c & 0x7)))
3873 3972 break;
3874 3973
3875 3974 ASSERT(i < size);
3876 3975 dest[i++] = c;
3877 3976 }
3878 3977
3879 3978 ASSERT(i < size);
3880 3979 dest[i] = '\0';
3881 3980 regs[rd] = (uintptr_t)dest;
3882 3981 mstate->dtms_scratch_ptr += size;
3883 3982 mstate->dtms_strtok = addr;
3884 3983 break;
3885 3984 }
3886 3985
3887 3986 case DIF_SUBR_SUBSTR: {
3888 3987 uintptr_t s = tupregs[0].dttk_value;
3889 3988 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3890 3989 char *d = (char *)mstate->dtms_scratch_ptr;
3891 3990 int64_t index = (int64_t)tupregs[1].dttk_value;
3892 3991 int64_t remaining = (int64_t)tupregs[2].dttk_value;
3893 3992 size_t len = dtrace_strlen((char *)s, size);
3894 3993 int64_t i;
3895 3994
3896 3995 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
3897 3996 regs[rd] = NULL;
3898 3997 break;
3899 3998 }
3900 3999
3901 4000 if (!DTRACE_INSCRATCH(mstate, size)) {
3902 4001 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3903 4002 regs[rd] = NULL;
3904 4003 break;
3905 4004 }
3906 4005
3907 4006 if (nargs <= 2)
3908 4007 remaining = (int64_t)size;
3909 4008
3910 4009 if (index < 0) {
3911 4010 index += len;
3912 4011
3913 4012 if (index < 0 && index + remaining > 0) {
3914 4013 remaining += index;
3915 4014 index = 0;
3916 4015 }
3917 4016 }
3918 4017
3919 4018 if (index >= len || index < 0) {
3920 4019 remaining = 0;
3921 4020 } else if (remaining < 0) {
3922 4021 remaining += len - index;
3923 4022 } else if (index + remaining > size) {
3924 4023 remaining = size - index;
3925 4024 }
3926 4025
3927 4026 for (i = 0; i < remaining; i++) {
3928 4027 if ((d[i] = dtrace_load8(s + index + i)) == '\0')
3929 4028 break;
3930 4029 }
3931 4030
3932 4031 d[i] = '\0';
3933 4032
3934 4033 mstate->dtms_scratch_ptr += size;
3935 4034 regs[rd] = (uintptr_t)d;
3936 4035 break;
3937 4036 }
3938 4037
3939 4038 case DIF_SUBR_TOUPPER:
3940 4039 case DIF_SUBR_TOLOWER: {
3941 4040 uintptr_t s = tupregs[0].dttk_value;
3942 4041 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3943 4042 char *dest = (char *)mstate->dtms_scratch_ptr, c;
3944 4043 size_t len = dtrace_strlen((char *)s, size);
3945 4044 char lower, upper, convert;
3946 4045 int64_t i;
3947 4046
3948 4047 if (subr == DIF_SUBR_TOUPPER) {
3949 4048 lower = 'a';
3950 4049 upper = 'z';
3951 4050 convert = 'A';
3952 4051 } else {
3953 4052 lower = 'A';
3954 4053 upper = 'Z';
3955 4054 convert = 'a';
3956 4055 }
3957 4056
3958 4057 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
3959 4058 regs[rd] = NULL;
3960 4059 break;
3961 4060 }
3962 4061
3963 4062 if (!DTRACE_INSCRATCH(mstate, size)) {
3964 4063 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3965 4064 regs[rd] = NULL;
3966 4065 break;
3967 4066 }
3968 4067
3969 4068 for (i = 0; i < size - 1; i++) {
3970 4069 if ((c = dtrace_load8(s + i)) == '\0')
3971 4070 break;
3972 4071
3973 4072 if (c >= lower && c <= upper)
3974 4073 c = convert + (c - lower);
3975 4074
3976 4075 dest[i] = c;
3977 4076 }
3978 4077
3979 4078 ASSERT(i < size);
3980 4079 dest[i] = '\0';
3981 4080 regs[rd] = (uintptr_t)dest;
3982 4081 mstate->dtms_scratch_ptr += size;
3983 4082 break;
3984 4083 }
3985 4084
3986 4085 case DIF_SUBR_GETMAJOR:
3987 4086 #ifdef _LP64
3988 4087 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
3989 4088 #else
3990 4089 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
3991 4090 #endif
3992 4091 break;
3993 4092
3994 4093 case DIF_SUBR_GETMINOR:
3995 4094 #ifdef _LP64
3996 4095 regs[rd] = tupregs[0].dttk_value & MAXMIN64;
3997 4096 #else
3998 4097 regs[rd] = tupregs[0].dttk_value & MAXMIN;
3999 4098 #endif
4000 4099 break;
4001 4100
4002 4101 case DIF_SUBR_DDI_PATHNAME: {
4003 4102 /*
4004 4103 * This one is a galactic mess. We are going to roughly
4005 4104 * emulate ddi_pathname(), but it's made more complicated
4006 4105 * by the fact that we (a) want to include the minor name and
4007 4106 * (b) must proceed iteratively instead of recursively.
4008 4107 */
4009 4108 uintptr_t dest = mstate->dtms_scratch_ptr;
4010 4109 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4011 4110 char *start = (char *)dest, *end = start + size - 1;
4012 4111 uintptr_t daddr = tupregs[0].dttk_value;
4013 4112 int64_t minor = (int64_t)tupregs[1].dttk_value;
4014 4113 char *s;
4015 4114 int i, len, depth = 0;
4016 4115
4017 4116 /*
4018 4117 * Due to all the pointer jumping we do and context we must
4019 4118 * rely upon, we just mandate that the user must have kernel
4020 4119 * read privileges to use this routine.
4021 4120 */
4022 4121 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
4023 4122 *flags |= CPU_DTRACE_KPRIV;
4024 4123 *illval = daddr;
4025 4124 regs[rd] = NULL;
4026 4125 }
4027 4126
4028 4127 if (!DTRACE_INSCRATCH(mstate, size)) {
4029 4128 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4030 4129 regs[rd] = NULL;
4031 4130 break;
4032 4131 }
4033 4132
4034 4133 *end = '\0';
4035 4134
4036 4135 /*
4037 4136 * We want to have a name for the minor. In order to do this,
4038 4137 * we need to walk the minor list from the devinfo. We want
4039 4138 * to be sure that we don't infinitely walk a circular list,
4040 4139 * so we check for circularity by sending a scout pointer
4041 4140 * ahead two elements for every element that we iterate over;
4042 4141 * if the list is circular, these will ultimately point to the
4043 4142 * same element. You may recognize this little trick as the
4044 4143 * answer to a stupid interview question -- one that always
4045 4144 * seems to be asked by those who had to have it laboriously
4046 4145 * explained to them, and who can't even concisely describe
4047 4146 * the conditions under which one would be forced to resort to
4048 4147 * this technique. Needless to say, those conditions are
4049 4148 * found here -- and probably only here. Is this the only use
4050 4149 * of this infamous trick in shipping, production code? If it
4051 4150 * isn't, it probably should be...
4052 4151 */
4053 4152 if (minor != -1) {
4054 4153 uintptr_t maddr = dtrace_loadptr(daddr +
4055 4154 offsetof(struct dev_info, devi_minor));
4056 4155
4057 4156 uintptr_t next = offsetof(struct ddi_minor_data, next);
4058 4157 uintptr_t name = offsetof(struct ddi_minor_data,
4059 4158 d_minor) + offsetof(struct ddi_minor, name);
4060 4159 uintptr_t dev = offsetof(struct ddi_minor_data,
4061 4160 d_minor) + offsetof(struct ddi_minor, dev);
4062 4161 uintptr_t scout;
4063 4162
4064 4163 if (maddr != NULL)
4065 4164 scout = dtrace_loadptr(maddr + next);
4066 4165
4067 4166 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4068 4167 uint64_t m;
4069 4168 #ifdef _LP64
4070 4169 m = dtrace_load64(maddr + dev) & MAXMIN64;
4071 4170 #else
4072 4171 m = dtrace_load32(maddr + dev) & MAXMIN;
4073 4172 #endif
4074 4173 if (m != minor) {
4075 4174 maddr = dtrace_loadptr(maddr + next);
4076 4175
4077 4176 if (scout == NULL)
4078 4177 continue;
4079 4178
4080 4179 scout = dtrace_loadptr(scout + next);
4081 4180
4082 4181 if (scout == NULL)
4083 4182 continue;
4084 4183
4085 4184 scout = dtrace_loadptr(scout + next);
4086 4185
4087 4186 if (scout == NULL)
4088 4187 continue;
4089 4188
4090 4189 if (scout == maddr) {
4091 4190 *flags |= CPU_DTRACE_ILLOP;
4092 4191 break;
4093 4192 }
4094 4193
4095 4194 continue;
4096 4195 }
4097 4196
4098 4197 /*
4099 4198 * We have the minor data. Now we need to
4100 4199 * copy the minor's name into the end of the
4101 4200 * pathname.
4102 4201 */
4103 4202 s = (char *)dtrace_loadptr(maddr + name);
4104 4203 len = dtrace_strlen(s, size);
4105 4204
4106 4205 if (*flags & CPU_DTRACE_FAULT)
4107 4206 break;
4108 4207
4109 4208 if (len != 0) {
4110 4209 if ((end -= (len + 1)) < start)
4111 4210 break;
4112 4211
4113 4212 *end = ':';
4114 4213 }
4115 4214
4116 4215 for (i = 1; i <= len; i++)
4117 4216 end[i] = dtrace_load8((uintptr_t)s++);
4118 4217 break;
4119 4218 }
4120 4219 }
4121 4220
4122 4221 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4123 4222 ddi_node_state_t devi_state;
4124 4223
4125 4224 devi_state = dtrace_load32(daddr +
4126 4225 offsetof(struct dev_info, devi_node_state));
4127 4226
4128 4227 if (*flags & CPU_DTRACE_FAULT)
4129 4228 break;
4130 4229
4131 4230 if (devi_state >= DS_INITIALIZED) {
4132 4231 s = (char *)dtrace_loadptr(daddr +
4133 4232 offsetof(struct dev_info, devi_addr));
4134 4233 len = dtrace_strlen(s, size);
4135 4234
4136 4235 if (*flags & CPU_DTRACE_FAULT)
4137 4236 break;
4138 4237
4139 4238 if (len != 0) {
4140 4239 if ((end -= (len + 1)) < start)
4141 4240 break;
4142 4241
4143 4242 *end = '@';
4144 4243 }
4145 4244
4146 4245 for (i = 1; i <= len; i++)
4147 4246 end[i] = dtrace_load8((uintptr_t)s++);
4148 4247 }
4149 4248
4150 4249 /*
4151 4250 * Now for the node name...
4152 4251 */
4153 4252 s = (char *)dtrace_loadptr(daddr +
4154 4253 offsetof(struct dev_info, devi_node_name));
4155 4254
4156 4255 daddr = dtrace_loadptr(daddr +
4157 4256 offsetof(struct dev_info, devi_parent));
4158 4257
4159 4258 /*
4160 4259 * If our parent is NULL (that is, if we're the root
4161 4260 * node), we're going to use the special path
4162 4261 * "devices".
4163 4262 */
4164 4263 if (daddr == NULL)
4165 4264 s = "devices";
4166 4265
4167 4266 len = dtrace_strlen(s, size);
4168 4267 if (*flags & CPU_DTRACE_FAULT)
4169 4268 break;
4170 4269
4171 4270 if ((end -= (len + 1)) < start)
4172 4271 break;
4173 4272
4174 4273 for (i = 1; i <= len; i++)
4175 4274 end[i] = dtrace_load8((uintptr_t)s++);
4176 4275 *end = '/';
4177 4276
4178 4277 if (depth++ > dtrace_devdepth_max) {
4179 4278 *flags |= CPU_DTRACE_ILLOP;
4180 4279 break;
4181 4280 }
4182 4281 }
4183 4282
4184 4283 if (end < start)
4185 4284 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4186 4285
4187 4286 if (daddr == NULL) {
4188 4287 regs[rd] = (uintptr_t)end;
4189 4288 mstate->dtms_scratch_ptr += size;
4190 4289 }
4191 4290
4192 4291 break;
4193 4292 }
4194 4293
4195 4294 case DIF_SUBR_STRJOIN: {
4196 4295 char *d = (char *)mstate->dtms_scratch_ptr;
4197 4296 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4198 4297 uintptr_t s1 = tupregs[0].dttk_value;
4199 4298 uintptr_t s2 = tupregs[1].dttk_value;
4200 4299 int i = 0;
4201 4300
4202 4301 if (!dtrace_strcanload(s1, size, mstate, vstate) ||
4203 4302 !dtrace_strcanload(s2, size, mstate, vstate)) {
4204 4303 regs[rd] = NULL;
4205 4304 break;
4206 4305 }
4207 4306
4208 4307 if (!DTRACE_INSCRATCH(mstate, size)) {
4209 4308 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4210 4309 regs[rd] = NULL;
4211 4310 break;
4212 4311 }
4213 4312
4214 4313 for (;;) {
4215 4314 if (i >= size) {
4216 4315 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4217 4316 regs[rd] = NULL;
4218 4317 break;
4219 4318 }
4220 4319
4221 4320 if ((d[i++] = dtrace_load8(s1++)) == '\0') {
4222 4321 i--;
4223 4322 break;
4224 4323 }
4225 4324 }
4226 4325
4227 4326 for (;;) {
4228 4327 if (i >= size) {
4229 4328 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4230 4329 regs[rd] = NULL;
4231 4330 break;
4232 4331 }
4233 4332
4234 4333 if ((d[i++] = dtrace_load8(s2++)) == '\0')
4235 4334 break;
4236 4335 }
4237 4336
4238 4337 if (i < size) {
4239 4338 mstate->dtms_scratch_ptr += i;
4240 4339 regs[rd] = (uintptr_t)d;
4241 4340 }
4242 4341
4243 4342 break;
4244 4343 }
4245 4344
4246 4345 case DIF_SUBR_LLTOSTR: {
4247 4346 int64_t i = (int64_t)tupregs[0].dttk_value;
4248 4347 uint64_t val, digit;
4249 4348 uint64_t size = 65; /* enough room for 2^64 in binary */
4250 4349 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
4251 4350 int base = 10;
4252 4351
4253 4352 if (nargs > 1) {
4254 4353 if ((base = tupregs[1].dttk_value) <= 1 ||
4255 4354 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
4256 4355 *flags |= CPU_DTRACE_ILLOP;
4257 4356 break;
4258 4357 }
4259 4358 }
4260 4359
4261 4360 val = (base == 10 && i < 0) ? i * -1 : i;
4262 4361
4263 4362 if (!DTRACE_INSCRATCH(mstate, size)) {
4264 4363 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4265 4364 regs[rd] = NULL;
4266 4365 break;
4267 4366 }
4268 4367
4269 4368 for (*end-- = '\0'; val; val /= base) {
4270 4369 if ((digit = val % base) <= '9' - '0') {
4271 4370 *end-- = '0' + digit;
4272 4371 } else {
4273 4372 *end-- = 'a' + (digit - ('9' - '0') - 1);
4274 4373 }
4275 4374 }
4276 4375
4277 4376 if (i == 0 && base == 16)
4278 4377 *end-- = '0';
4279 4378
4280 4379 if (base == 16)
4281 4380 *end-- = 'x';
4282 4381
4283 4382 if (i == 0 || base == 8 || base == 16)
4284 4383 *end-- = '0';
4285 4384
4286 4385 if (i < 0 && base == 10)
4287 4386 *end-- = '-';
4288 4387
4289 4388 regs[rd] = (uintptr_t)end + 1;
4290 4389 mstate->dtms_scratch_ptr += size;
4291 4390 break;
4292 4391 }
4293 4392
4294 4393 case DIF_SUBR_HTONS:
4295 4394 case DIF_SUBR_NTOHS:
4296 4395 #ifdef _BIG_ENDIAN
4297 4396 regs[rd] = (uint16_t)tupregs[0].dttk_value;
4298 4397 #else
4299 4398 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
4300 4399 #endif
4301 4400 break;
4302 4401
4303 4402
4304 4403 case DIF_SUBR_HTONL:
4305 4404 case DIF_SUBR_NTOHL:
4306 4405 #ifdef _BIG_ENDIAN
4307 4406 regs[rd] = (uint32_t)tupregs[0].dttk_value;
4308 4407 #else
4309 4408 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
4310 4409 #endif
4311 4410 break;
4312 4411
4313 4412
4314 4413 case DIF_SUBR_HTONLL:
4315 4414 case DIF_SUBR_NTOHLL:
4316 4415 #ifdef _BIG_ENDIAN
4317 4416 regs[rd] = (uint64_t)tupregs[0].dttk_value;
4318 4417 #else
4319 4418 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
4320 4419 #endif
4321 4420 break;
4322 4421
4323 4422
4324 4423 case DIF_SUBR_DIRNAME:
4325 4424 case DIF_SUBR_BASENAME: {
4326 4425 char *dest = (char *)mstate->dtms_scratch_ptr;
4327 4426 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4328 4427 uintptr_t src = tupregs[0].dttk_value;
4329 4428 int i, j, len = dtrace_strlen((char *)src, size);
4330 4429 int lastbase = -1, firstbase = -1, lastdir = -1;
4331 4430 int start, end;
4332 4431
4333 4432 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
4334 4433 regs[rd] = NULL;
4335 4434 break;
4336 4435 }
4337 4436
4338 4437 if (!DTRACE_INSCRATCH(mstate, size)) {
4339 4438 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4340 4439 regs[rd] = NULL;
4341 4440 break;
4342 4441 }
4343 4442
4344 4443 /*
4345 4444 * The basename and dirname for a zero-length string is
4346 4445 * defined to be "."
4347 4446 */
4348 4447 if (len == 0) {
4349 4448 len = 1;
4350 4449 src = (uintptr_t)".";
4351 4450 }
4352 4451
4353 4452 /*
4354 4453 * Start from the back of the string, moving back toward the
4355 4454 * front until we see a character that isn't a slash. That
4356 4455 * character is the last character in the basename.
4357 4456 */
4358 4457 for (i = len - 1; i >= 0; i--) {
4359 4458 if (dtrace_load8(src + i) != '/')
4360 4459 break;
4361 4460 }
4362 4461
4363 4462 if (i >= 0)
4364 4463 lastbase = i;
4365 4464
4366 4465 /*
4367 4466 * Starting from the last character in the basename, move
4368 4467 * towards the front until we find a slash. The character
4369 4468 * that we processed immediately before that is the first
4370 4469 * character in the basename.
4371 4470 */
4372 4471 for (; i >= 0; i--) {
4373 4472 if (dtrace_load8(src + i) == '/')
4374 4473 break;
4375 4474 }
4376 4475
4377 4476 if (i >= 0)
4378 4477 firstbase = i + 1;
4379 4478
4380 4479 /*
4381 4480 * Now keep going until we find a non-slash character. That
4382 4481 * character is the last character in the dirname.
4383 4482 */
4384 4483 for (; i >= 0; i--) {
4385 4484 if (dtrace_load8(src + i) != '/')
4386 4485 break;
4387 4486 }
4388 4487
4389 4488 if (i >= 0)
4390 4489 lastdir = i;
4391 4490
4392 4491 ASSERT(!(lastbase == -1 && firstbase != -1));
4393 4492 ASSERT(!(firstbase == -1 && lastdir != -1));
4394 4493
4395 4494 if (lastbase == -1) {
4396 4495 /*
4397 4496 * We didn't find a non-slash character. We know that
4398 4497 * the length is non-zero, so the whole string must be
4399 4498 * slashes. In either the dirname or the basename
4400 4499 * case, we return '/'.
4401 4500 */
4402 4501 ASSERT(firstbase == -1);
4403 4502 firstbase = lastbase = lastdir = 0;
4404 4503 }
4405 4504
4406 4505 if (firstbase == -1) {
4407 4506 /*
4408 4507 * The entire string consists only of a basename
4409 4508 * component. If we're looking for dirname, we need
4410 4509 * to change our string to be just "."; if we're
4411 4510 * looking for a basename, we'll just set the first
4412 4511 * character of the basename to be 0.
4413 4512 */
4414 4513 if (subr == DIF_SUBR_DIRNAME) {
4415 4514 ASSERT(lastdir == -1);
4416 4515 src = (uintptr_t)".";
4417 4516 lastdir = 0;
4418 4517 } else {
4419 4518 firstbase = 0;
4420 4519 }
4421 4520 }
4422 4521
4423 4522 if (subr == DIF_SUBR_DIRNAME) {
4424 4523 if (lastdir == -1) {
4425 4524 /*
4426 4525 * We know that we have a slash in the name --
4427 4526 * or lastdir would be set to 0, above. And
4428 4527 * because lastdir is -1, we know that this
4429 4528 * slash must be the first character. (That
4430 4529 * is, the full string must be of the form
4431 4530 * "/basename".) In this case, the last
4432 4531 * character of the directory name is 0.
4433 4532 */
4434 4533 lastdir = 0;
4435 4534 }
4436 4535
4437 4536 start = 0;
4438 4537 end = lastdir;
4439 4538 } else {
4440 4539 ASSERT(subr == DIF_SUBR_BASENAME);
4441 4540 ASSERT(firstbase != -1 && lastbase != -1);
4442 4541 start = firstbase;
4443 4542 end = lastbase;
4444 4543 }
↓ open down ↓ |
1507 lines elided |
↑ open up ↑ |
4445 4544
4446 4545 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4447 4546 dest[j] = dtrace_load8(src + i);
4448 4547
4449 4548 dest[j] = '\0';
4450 4549 regs[rd] = (uintptr_t)dest;
4451 4550 mstate->dtms_scratch_ptr += size;
4452 4551 break;
4453 4552 }
4454 4553
4554 + case DIF_SUBR_GETF: {
4555 + uintptr_t fd = tupregs[0].dttk_value;
4556 + uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
4557 + file_t *fp;
4558 +
4559 + if (!dtrace_priv_proc(state, mstate)) {
4560 + regs[rd] = NULL;
4561 + break;
4562 + }
4563 +
4564 + /*
4565 + * This is safe because fi_nfiles only increases, and the
4566 + * fi_list array is not freed when the array size doubles.
4567 + * (See the comment in flist_grow() for details on the
4568 + * management of the u_finfo structure.)
4569 + */
4570 + fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
4571 +
4572 + mstate->dtms_getf = fp;
4573 + regs[rd] = (uintptr_t)fp;
4574 + break;
4575 + }
4576 +
4455 4577 case DIF_SUBR_CLEANPATH: {
4456 4578 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4457 4579 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4458 4580 uintptr_t src = tupregs[0].dttk_value;
4459 4581 int i = 0, j = 0;
4582 + zone_t *z;
4460 4583
4461 4584 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4462 4585 regs[rd] = NULL;
4463 4586 break;
4464 4587 }
4465 4588
4466 4589 if (!DTRACE_INSCRATCH(mstate, size)) {
4467 4590 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4468 4591 regs[rd] = NULL;
4469 4592 break;
4470 4593 }
4471 4594
4472 4595 /*
4473 4596 * Move forward, loading each character.
4474 4597 */
4475 4598 do {
4476 4599 c = dtrace_load8(src + i++);
4477 4600 next:
4478 4601 if (j + 5 >= size) /* 5 = strlen("/..c\0") */
4479 4602 break;
4480 4603
4481 4604 if (c != '/') {
4482 4605 dest[j++] = c;
4483 4606 continue;
4484 4607 }
4485 4608
4486 4609 c = dtrace_load8(src + i++);
4487 4610
4488 4611 if (c == '/') {
4489 4612 /*
4490 4613 * We have two slashes -- we can just advance
4491 4614 * to the next character.
4492 4615 */
4493 4616 goto next;
4494 4617 }
4495 4618
4496 4619 if (c != '.') {
4497 4620 /*
4498 4621 * This is not "." and it's not ".." -- we can
4499 4622 * just store the "/" and this character and
4500 4623 * drive on.
4501 4624 */
4502 4625 dest[j++] = '/';
4503 4626 dest[j++] = c;
4504 4627 continue;
4505 4628 }
4506 4629
4507 4630 c = dtrace_load8(src + i++);
4508 4631
4509 4632 if (c == '/') {
4510 4633 /*
4511 4634 * This is a "/./" component. We're not going
4512 4635 * to store anything in the destination buffer;
4513 4636 * we're just going to go to the next component.
4514 4637 */
4515 4638 goto next;
4516 4639 }
4517 4640
4518 4641 if (c != '.') {
4519 4642 /*
4520 4643 * This is not ".." -- we can just store the
4521 4644 * "/." and this character and continue
4522 4645 * processing.
4523 4646 */
4524 4647 dest[j++] = '/';
4525 4648 dest[j++] = '.';
4526 4649 dest[j++] = c;
4527 4650 continue;
4528 4651 }
4529 4652
4530 4653 c = dtrace_load8(src + i++);
4531 4654
4532 4655 if (c != '/' && c != '\0') {
4533 4656 /*
4534 4657 * This is not ".." -- it's "..[mumble]".
4535 4658 * We'll store the "/.." and this character
4536 4659 * and continue processing.
4537 4660 */
4538 4661 dest[j++] = '/';
4539 4662 dest[j++] = '.';
4540 4663 dest[j++] = '.';
4541 4664 dest[j++] = c;
4542 4665 continue;
4543 4666 }
4544 4667
4545 4668 /*
4546 4669 * This is "/../" or "/..\0". We need to back up
4547 4670 * our destination pointer until we find a "/".
↓ open down ↓ |
78 lines elided |
↑ open up ↑ |
4548 4671 */
4549 4672 i--;
4550 4673 while (j != 0 && dest[--j] != '/')
4551 4674 continue;
4552 4675
4553 4676 if (c == '\0')
4554 4677 dest[++j] = '/';
4555 4678 } while (c != '\0');
4556 4679
4557 4680 dest[j] = '\0';
4681 +
4682 + if (mstate->dtms_getf != NULL &&
4683 + !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
4684 + (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
4685 + /*
4686 + * If we've done a getf() as a part of this ECB and we
4687 + * don't have kernel access (and we're not in the global
4688 + * zone), check if the path we cleaned up begins with
4689 + * the zone's root path, and trim it off if so. Note
4690 + * that this is an output cleanliness issue, not a
4691 + * security issue: knowing one's zone root path does
4692 + * not enable privilege escalation.
4693 + */
4694 + if (strstr(dest, z->zone_rootpath) == dest)
4695 + dest += strlen(z->zone_rootpath) - 1;
4696 + }
4697 +
4558 4698 regs[rd] = (uintptr_t)dest;
4559 4699 mstate->dtms_scratch_ptr += size;
4560 4700 break;
4561 4701 }
4562 4702
4563 4703 case DIF_SUBR_INET_NTOA:
4564 4704 case DIF_SUBR_INET_NTOA6:
4565 4705 case DIF_SUBR_INET_NTOP: {
4566 4706 size_t size;
4567 4707 int af, argi, i;
4568 4708 char *base, *end;
4569 4709
4570 4710 if (subr == DIF_SUBR_INET_NTOP) {
4571 4711 af = (int)tupregs[0].dttk_value;
4572 4712 argi = 1;
4573 4713 } else {
4574 4714 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4575 4715 argi = 0;
4576 4716 }
4577 4717
4578 4718 if (af == AF_INET) {
4579 4719 ipaddr_t ip4;
4580 4720 uint8_t *ptr8, val;
4581 4721
4582 4722 /*
4583 4723 * Safely load the IPv4 address.
4584 4724 */
4585 4725 ip4 = dtrace_load32(tupregs[argi].dttk_value);
4586 4726
4587 4727 /*
4588 4728 * Check an IPv4 string will fit in scratch.
4589 4729 */
4590 4730 size = INET_ADDRSTRLEN;
4591 4731 if (!DTRACE_INSCRATCH(mstate, size)) {
4592 4732 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4593 4733 regs[rd] = NULL;
4594 4734 break;
4595 4735 }
4596 4736 base = (char *)mstate->dtms_scratch_ptr;
4597 4737 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4598 4738
4599 4739 /*
4600 4740 * Stringify as a dotted decimal quad.
4601 4741 */
4602 4742 *end-- = '\0';
4603 4743 ptr8 = (uint8_t *)&ip4;
4604 4744 for (i = 3; i >= 0; i--) {
4605 4745 val = ptr8[i];
4606 4746
4607 4747 if (val == 0) {
4608 4748 *end-- = '0';
4609 4749 } else {
4610 4750 for (; val; val /= 10) {
4611 4751 *end-- = '0' + (val % 10);
4612 4752 }
4613 4753 }
4614 4754
4615 4755 if (i > 0)
4616 4756 *end-- = '.';
4617 4757 }
4618 4758 ASSERT(end + 1 >= base);
4619 4759
4620 4760 } else if (af == AF_INET6) {
4621 4761 struct in6_addr ip6;
4622 4762 int firstzero, tryzero, numzero, v6end;
4623 4763 uint16_t val;
4624 4764 const char digits[] = "0123456789abcdef";
4625 4765
4626 4766 /*
4627 4767 * Stringify using RFC 1884 convention 2 - 16 bit
4628 4768 * hexadecimal values with a zero-run compression.
4629 4769 * Lower case hexadecimal digits are used.
4630 4770 * eg, fe80::214:4fff:fe0b:76c8.
4631 4771 * The IPv4 embedded form is returned for inet_ntop,
4632 4772 * just the IPv4 string is returned for inet_ntoa6.
4633 4773 */
4634 4774
4635 4775 /*
4636 4776 * Safely load the IPv6 address.
4637 4777 */
4638 4778 dtrace_bcopy(
4639 4779 (void *)(uintptr_t)tupregs[argi].dttk_value,
4640 4780 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
4641 4781
4642 4782 /*
4643 4783 * Check an IPv6 string will fit in scratch.
4644 4784 */
4645 4785 size = INET6_ADDRSTRLEN;
4646 4786 if (!DTRACE_INSCRATCH(mstate, size)) {
4647 4787 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4648 4788 regs[rd] = NULL;
4649 4789 break;
4650 4790 }
4651 4791 base = (char *)mstate->dtms_scratch_ptr;
4652 4792 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4653 4793 *end-- = '\0';
4654 4794
4655 4795 /*
4656 4796 * Find the longest run of 16 bit zero values
4657 4797 * for the single allowed zero compression - "::".
4658 4798 */
4659 4799 firstzero = -1;
4660 4800 tryzero = -1;
4661 4801 numzero = 1;
4662 4802 for (i = 0; i < sizeof (struct in6_addr); i++) {
4663 4803 if (ip6._S6_un._S6_u8[i] == 0 &&
4664 4804 tryzero == -1 && i % 2 == 0) {
4665 4805 tryzero = i;
4666 4806 continue;
4667 4807 }
4668 4808
4669 4809 if (tryzero != -1 &&
4670 4810 (ip6._S6_un._S6_u8[i] != 0 ||
4671 4811 i == sizeof (struct in6_addr) - 1)) {
4672 4812
4673 4813 if (i - tryzero <= numzero) {
4674 4814 tryzero = -1;
4675 4815 continue;
4676 4816 }
4677 4817
4678 4818 firstzero = tryzero;
4679 4819 numzero = i - i % 2 - tryzero;
4680 4820 tryzero = -1;
4681 4821
4682 4822 if (ip6._S6_un._S6_u8[i] == 0 &&
4683 4823 i == sizeof (struct in6_addr) - 1)
4684 4824 numzero += 2;
4685 4825 }
4686 4826 }
4687 4827 ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
4688 4828
4689 4829 /*
4690 4830 * Check for an IPv4 embedded address.
4691 4831 */
4692 4832 v6end = sizeof (struct in6_addr) - 2;
4693 4833 if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
4694 4834 IN6_IS_ADDR_V4COMPAT(&ip6)) {
4695 4835 for (i = sizeof (struct in6_addr) - 1;
4696 4836 i >= DTRACE_V4MAPPED_OFFSET; i--) {
4697 4837 ASSERT(end >= base);
4698 4838
4699 4839 val = ip6._S6_un._S6_u8[i];
4700 4840
4701 4841 if (val == 0) {
4702 4842 *end-- = '0';
4703 4843 } else {
4704 4844 for (; val; val /= 10) {
4705 4845 *end-- = '0' + val % 10;
4706 4846 }
4707 4847 }
4708 4848
4709 4849 if (i > DTRACE_V4MAPPED_OFFSET)
4710 4850 *end-- = '.';
4711 4851 }
4712 4852
4713 4853 if (subr == DIF_SUBR_INET_NTOA6)
4714 4854 goto inetout;
4715 4855
4716 4856 /*
4717 4857 * Set v6end to skip the IPv4 address that
4718 4858 * we have already stringified.
4719 4859 */
4720 4860 v6end = 10;
4721 4861 }
4722 4862
4723 4863 /*
4724 4864 * Build the IPv6 string by working through the
4725 4865 * address in reverse.
4726 4866 */
4727 4867 for (i = v6end; i >= 0; i -= 2) {
4728 4868 ASSERT(end >= base);
4729 4869
4730 4870 if (i == firstzero + numzero - 2) {
4731 4871 *end-- = ':';
4732 4872 *end-- = ':';
4733 4873 i -= numzero - 2;
4734 4874 continue;
4735 4875 }
4736 4876
4737 4877 if (i < 14 && i != firstzero - 2)
4738 4878 *end-- = ':';
4739 4879
4740 4880 val = (ip6._S6_un._S6_u8[i] << 8) +
4741 4881 ip6._S6_un._S6_u8[i + 1];
4742 4882
4743 4883 if (val == 0) {
4744 4884 *end-- = '0';
4745 4885 } else {
4746 4886 for (; val; val /= 16) {
4747 4887 *end-- = digits[val % 16];
4748 4888 }
4749 4889 }
4750 4890 }
4751 4891 ASSERT(end + 1 >= base);
4752 4892
4753 4893 } else {
4754 4894 /*
4755 4895 * The user didn't use AH_INET or AH_INET6.
4756 4896 */
4757 4897 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
4758 4898 regs[rd] = NULL;
4759 4899 break;
4760 4900 }
4761 4901
4762 4902 inetout: regs[rd] = (uintptr_t)end + 1;
4763 4903 mstate->dtms_scratch_ptr += size;
4764 4904 break;
4765 4905 }
4766 4906
4767 4907 }
4768 4908 }
4769 4909
4770 4910 /*
4771 4911 * Emulate the execution of DTrace IR instructions specified by the given
4772 4912 * DIF object. This function is deliberately void of assertions as all of
4773 4913 * the necessary checks are handled by a call to dtrace_difo_validate().
4774 4914 */
4775 4915 static uint64_t
4776 4916 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
4777 4917 dtrace_vstate_t *vstate, dtrace_state_t *state)
4778 4918 {
4779 4919 const dif_instr_t *text = difo->dtdo_buf;
4780 4920 const uint_t textlen = difo->dtdo_len;
4781 4921 const char *strtab = difo->dtdo_strtab;
4782 4922 const uint64_t *inttab = difo->dtdo_inttab;
4783 4923
4784 4924 uint64_t rval = 0;
4785 4925 dtrace_statvar_t *svar;
4786 4926 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
4787 4927 dtrace_difv_t *v;
4788 4928 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
4789 4929 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
4790 4930
4791 4931 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
4792 4932 uint64_t regs[DIF_DIR_NREGS];
4793 4933 uint64_t *tmp;
4794 4934
4795 4935 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
4796 4936 int64_t cc_r;
4797 4937 uint_t pc = 0, id, opc;
4798 4938 uint8_t ttop = 0;
4799 4939 dif_instr_t instr;
4800 4940 uint_t r1, r2, rd;
4801 4941
4802 4942 /*
4803 4943 * We stash the current DIF object into the machine state: we need it
4804 4944 * for subsequent access checking.
4805 4945 */
4806 4946 mstate->dtms_difo = difo;
4807 4947
4808 4948 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
4809 4949
4810 4950 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
4811 4951 opc = pc;
4812 4952
4813 4953 instr = text[pc++];
4814 4954 r1 = DIF_INSTR_R1(instr);
4815 4955 r2 = DIF_INSTR_R2(instr);
4816 4956 rd = DIF_INSTR_RD(instr);
4817 4957
4818 4958 switch (DIF_INSTR_OP(instr)) {
4819 4959 case DIF_OP_OR:
4820 4960 regs[rd] = regs[r1] | regs[r2];
4821 4961 break;
4822 4962 case DIF_OP_XOR:
4823 4963 regs[rd] = regs[r1] ^ regs[r2];
4824 4964 break;
4825 4965 case DIF_OP_AND:
4826 4966 regs[rd] = regs[r1] & regs[r2];
4827 4967 break;
4828 4968 case DIF_OP_SLL:
4829 4969 regs[rd] = regs[r1] << regs[r2];
4830 4970 break;
4831 4971 case DIF_OP_SRL:
4832 4972 regs[rd] = regs[r1] >> regs[r2];
4833 4973 break;
4834 4974 case DIF_OP_SUB:
4835 4975 regs[rd] = regs[r1] - regs[r2];
4836 4976 break;
4837 4977 case DIF_OP_ADD:
4838 4978 regs[rd] = regs[r1] + regs[r2];
4839 4979 break;
4840 4980 case DIF_OP_MUL:
4841 4981 regs[rd] = regs[r1] * regs[r2];
4842 4982 break;
4843 4983 case DIF_OP_SDIV:
4844 4984 if (regs[r2] == 0) {
4845 4985 regs[rd] = 0;
4846 4986 *flags |= CPU_DTRACE_DIVZERO;
4847 4987 } else {
4848 4988 regs[rd] = (int64_t)regs[r1] /
4849 4989 (int64_t)regs[r2];
4850 4990 }
4851 4991 break;
4852 4992
4853 4993 case DIF_OP_UDIV:
4854 4994 if (regs[r2] == 0) {
4855 4995 regs[rd] = 0;
4856 4996 *flags |= CPU_DTRACE_DIVZERO;
4857 4997 } else {
4858 4998 regs[rd] = regs[r1] / regs[r2];
4859 4999 }
4860 5000 break;
4861 5001
4862 5002 case DIF_OP_SREM:
4863 5003 if (regs[r2] == 0) {
4864 5004 regs[rd] = 0;
4865 5005 *flags |= CPU_DTRACE_DIVZERO;
4866 5006 } else {
4867 5007 regs[rd] = (int64_t)regs[r1] %
4868 5008 (int64_t)regs[r2];
4869 5009 }
4870 5010 break;
4871 5011
4872 5012 case DIF_OP_UREM:
4873 5013 if (regs[r2] == 0) {
4874 5014 regs[rd] = 0;
4875 5015 *flags |= CPU_DTRACE_DIVZERO;
4876 5016 } else {
4877 5017 regs[rd] = regs[r1] % regs[r2];
4878 5018 }
4879 5019 break;
4880 5020
4881 5021 case DIF_OP_NOT:
4882 5022 regs[rd] = ~regs[r1];
4883 5023 break;
4884 5024 case DIF_OP_MOV:
4885 5025 regs[rd] = regs[r1];
4886 5026 break;
4887 5027 case DIF_OP_CMP:
4888 5028 cc_r = regs[r1] - regs[r2];
4889 5029 cc_n = cc_r < 0;
4890 5030 cc_z = cc_r == 0;
4891 5031 cc_v = 0;
4892 5032 cc_c = regs[r1] < regs[r2];
4893 5033 break;
4894 5034 case DIF_OP_TST:
4895 5035 cc_n = cc_v = cc_c = 0;
4896 5036 cc_z = regs[r1] == 0;
4897 5037 break;
4898 5038 case DIF_OP_BA:
4899 5039 pc = DIF_INSTR_LABEL(instr);
4900 5040 break;
4901 5041 case DIF_OP_BE:
4902 5042 if (cc_z)
4903 5043 pc = DIF_INSTR_LABEL(instr);
4904 5044 break;
4905 5045 case DIF_OP_BNE:
4906 5046 if (cc_z == 0)
4907 5047 pc = DIF_INSTR_LABEL(instr);
4908 5048 break;
4909 5049 case DIF_OP_BG:
4910 5050 if ((cc_z | (cc_n ^ cc_v)) == 0)
4911 5051 pc = DIF_INSTR_LABEL(instr);
4912 5052 break;
4913 5053 case DIF_OP_BGU:
4914 5054 if ((cc_c | cc_z) == 0)
4915 5055 pc = DIF_INSTR_LABEL(instr);
4916 5056 break;
4917 5057 case DIF_OP_BGE:
4918 5058 if ((cc_n ^ cc_v) == 0)
4919 5059 pc = DIF_INSTR_LABEL(instr);
4920 5060 break;
4921 5061 case DIF_OP_BGEU:
4922 5062 if (cc_c == 0)
4923 5063 pc = DIF_INSTR_LABEL(instr);
4924 5064 break;
4925 5065 case DIF_OP_BL:
4926 5066 if (cc_n ^ cc_v)
4927 5067 pc = DIF_INSTR_LABEL(instr);
4928 5068 break;
4929 5069 case DIF_OP_BLU:
4930 5070 if (cc_c)
4931 5071 pc = DIF_INSTR_LABEL(instr);
↓ open down ↓ |
364 lines elided |
↑ open up ↑ |
4932 5072 break;
4933 5073 case DIF_OP_BLE:
4934 5074 if (cc_z | (cc_n ^ cc_v))
4935 5075 pc = DIF_INSTR_LABEL(instr);
4936 5076 break;
4937 5077 case DIF_OP_BLEU:
4938 5078 if (cc_c | cc_z)
4939 5079 pc = DIF_INSTR_LABEL(instr);
4940 5080 break;
4941 5081 case DIF_OP_RLDSB:
4942 - if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4943 - *flags |= CPU_DTRACE_KPRIV;
4944 - *illval = regs[r1];
5082 + if (!dtrace_canload(regs[r1], 1, mstate, vstate))
4945 5083 break;
4946 - }
4947 5084 /*FALLTHROUGH*/
4948 5085 case DIF_OP_LDSB:
4949 5086 regs[rd] = (int8_t)dtrace_load8(regs[r1]);
4950 5087 break;
4951 5088 case DIF_OP_RLDSH:
4952 - if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4953 - *flags |= CPU_DTRACE_KPRIV;
4954 - *illval = regs[r1];
5089 + if (!dtrace_canload(regs[r1], 2, mstate, vstate))
4955 5090 break;
4956 - }
4957 5091 /*FALLTHROUGH*/
4958 5092 case DIF_OP_LDSH:
4959 5093 regs[rd] = (int16_t)dtrace_load16(regs[r1]);
4960 5094 break;
4961 5095 case DIF_OP_RLDSW:
4962 - if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4963 - *flags |= CPU_DTRACE_KPRIV;
4964 - *illval = regs[r1];
5096 + if (!dtrace_canload(regs[r1], 4, mstate, vstate))
4965 5097 break;
4966 - }
4967 5098 /*FALLTHROUGH*/
4968 5099 case DIF_OP_LDSW:
4969 5100 regs[rd] = (int32_t)dtrace_load32(regs[r1]);
4970 5101 break;
4971 5102 case DIF_OP_RLDUB:
4972 - if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4973 - *flags |= CPU_DTRACE_KPRIV;
4974 - *illval = regs[r1];
5103 + if (!dtrace_canload(regs[r1], 1, mstate, vstate))
4975 5104 break;
4976 - }
4977 5105 /*FALLTHROUGH*/
4978 5106 case DIF_OP_LDUB:
4979 5107 regs[rd] = dtrace_load8(regs[r1]);
4980 5108 break;
4981 5109 case DIF_OP_RLDUH:
4982 - if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4983 - *flags |= CPU_DTRACE_KPRIV;
4984 - *illval = regs[r1];
5110 + if (!dtrace_canload(regs[r1], 2, mstate, vstate))
4985 5111 break;
4986 - }
4987 5112 /*FALLTHROUGH*/
4988 5113 case DIF_OP_LDUH:
4989 5114 regs[rd] = dtrace_load16(regs[r1]);
4990 5115 break;
4991 5116 case DIF_OP_RLDUW:
4992 - if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4993 - *flags |= CPU_DTRACE_KPRIV;
4994 - *illval = regs[r1];
5117 + if (!dtrace_canload(regs[r1], 4, mstate, vstate))
4995 5118 break;
4996 - }
4997 5119 /*FALLTHROUGH*/
4998 5120 case DIF_OP_LDUW:
4999 5121 regs[rd] = dtrace_load32(regs[r1]);
5000 5122 break;
5001 5123 case DIF_OP_RLDX:
5002 - if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
5003 - *flags |= CPU_DTRACE_KPRIV;
5004 - *illval = regs[r1];
5124 + if (!dtrace_canload(regs[r1], 8, mstate, vstate))
5005 5125 break;
5006 - }
5007 5126 /*FALLTHROUGH*/
5008 5127 case DIF_OP_LDX:
5009 5128 regs[rd] = dtrace_load64(regs[r1]);
5010 5129 break;
5011 5130 case DIF_OP_ULDSB:
5012 5131 regs[rd] = (int8_t)
5013 5132 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5014 5133 break;
5015 5134 case DIF_OP_ULDSH:
5016 5135 regs[rd] = (int16_t)
5017 5136 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5018 5137 break;
5019 5138 case DIF_OP_ULDSW:
5020 5139 regs[rd] = (int32_t)
5021 5140 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5022 5141 break;
5023 5142 case DIF_OP_ULDUB:
5024 5143 regs[rd] =
5025 5144 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5026 5145 break;
5027 5146 case DIF_OP_ULDUH:
5028 5147 regs[rd] =
5029 5148 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5030 5149 break;
5031 5150 case DIF_OP_ULDUW:
5032 5151 regs[rd] =
5033 5152 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5034 5153 break;
5035 5154 case DIF_OP_ULDX:
5036 5155 regs[rd] =
5037 5156 dtrace_fuword64((void *)(uintptr_t)regs[r1]);
5038 5157 break;
5039 5158 case DIF_OP_RET:
5040 5159 rval = regs[rd];
5041 5160 pc = textlen;
5042 5161 break;
5043 5162 case DIF_OP_NOP:
5044 5163 break;
5045 5164 case DIF_OP_SETX:
5046 5165 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
5047 5166 break;
5048 5167 case DIF_OP_SETS:
5049 5168 regs[rd] = (uint64_t)(uintptr_t)
5050 5169 (strtab + DIF_INSTR_STRING(instr));
5051 5170 break;
5052 5171 case DIF_OP_SCMP: {
5053 5172 size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
5054 5173 uintptr_t s1 = regs[r1];
5055 5174 uintptr_t s2 = regs[r2];
5056 5175
5057 5176 if (s1 != NULL &&
5058 5177 !dtrace_strcanload(s1, sz, mstate, vstate))
5059 5178 break;
5060 5179 if (s2 != NULL &&
5061 5180 !dtrace_strcanload(s2, sz, mstate, vstate))
5062 5181 break;
5063 5182
5064 5183 cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz);
5065 5184
5066 5185 cc_n = cc_r < 0;
5067 5186 cc_z = cc_r == 0;
5068 5187 cc_v = cc_c = 0;
5069 5188 break;
5070 5189 }
5071 5190 case DIF_OP_LDGA:
5072 5191 regs[rd] = dtrace_dif_variable(mstate, state,
5073 5192 r1, regs[r2]);
5074 5193 break;
5075 5194 case DIF_OP_LDGS:
5076 5195 id = DIF_INSTR_VAR(instr);
5077 5196
5078 5197 if (id >= DIF_VAR_OTHER_UBASE) {
5079 5198 uintptr_t a;
5080 5199
5081 5200 id -= DIF_VAR_OTHER_UBASE;
5082 5201 svar = vstate->dtvs_globals[id];
5083 5202 ASSERT(svar != NULL);
5084 5203 v = &svar->dtsv_var;
5085 5204
5086 5205 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
5087 5206 regs[rd] = svar->dtsv_data;
5088 5207 break;
5089 5208 }
5090 5209
5091 5210 a = (uintptr_t)svar->dtsv_data;
5092 5211
5093 5212 if (*(uint8_t *)a == UINT8_MAX) {
5094 5213 /*
5095 5214 * If the 0th byte is set to UINT8_MAX
5096 5215 * then this is to be treated as a
5097 5216 * reference to a NULL variable.
5098 5217 */
5099 5218 regs[rd] = NULL;
5100 5219 } else {
5101 5220 regs[rd] = a + sizeof (uint64_t);
5102 5221 }
5103 5222
5104 5223 break;
5105 5224 }
5106 5225
5107 5226 regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
5108 5227 break;
5109 5228
5110 5229 case DIF_OP_STGS:
5111 5230 id = DIF_INSTR_VAR(instr);
5112 5231
5113 5232 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5114 5233 id -= DIF_VAR_OTHER_UBASE;
5115 5234
5116 5235 svar = vstate->dtvs_globals[id];
5117 5236 ASSERT(svar != NULL);
5118 5237 v = &svar->dtsv_var;
5119 5238
5120 5239 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5121 5240 uintptr_t a = (uintptr_t)svar->dtsv_data;
5122 5241
5123 5242 ASSERT(a != NULL);
5124 5243 ASSERT(svar->dtsv_size != 0);
5125 5244
5126 5245 if (regs[rd] == NULL) {
5127 5246 *(uint8_t *)a = UINT8_MAX;
5128 5247 break;
5129 5248 } else {
5130 5249 *(uint8_t *)a = 0;
5131 5250 a += sizeof (uint64_t);
5132 5251 }
5133 5252 if (!dtrace_vcanload(
5134 5253 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5135 5254 mstate, vstate))
5136 5255 break;
5137 5256
5138 5257 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5139 5258 (void *)a, &v->dtdv_type);
5140 5259 break;
5141 5260 }
5142 5261
5143 5262 svar->dtsv_data = regs[rd];
5144 5263 break;
5145 5264
5146 5265 case DIF_OP_LDTA:
5147 5266 /*
5148 5267 * There are no DTrace built-in thread-local arrays at
5149 5268 * present. This opcode is saved for future work.
5150 5269 */
5151 5270 *flags |= CPU_DTRACE_ILLOP;
5152 5271 regs[rd] = 0;
5153 5272 break;
5154 5273
5155 5274 case DIF_OP_LDLS:
5156 5275 id = DIF_INSTR_VAR(instr);
5157 5276
5158 5277 if (id < DIF_VAR_OTHER_UBASE) {
5159 5278 /*
5160 5279 * For now, this has no meaning.
5161 5280 */
5162 5281 regs[rd] = 0;
5163 5282 break;
5164 5283 }
5165 5284
5166 5285 id -= DIF_VAR_OTHER_UBASE;
5167 5286
5168 5287 ASSERT(id < vstate->dtvs_nlocals);
5169 5288 ASSERT(vstate->dtvs_locals != NULL);
5170 5289
5171 5290 svar = vstate->dtvs_locals[id];
5172 5291 ASSERT(svar != NULL);
5173 5292 v = &svar->dtsv_var;
5174 5293
5175 5294 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5176 5295 uintptr_t a = (uintptr_t)svar->dtsv_data;
5177 5296 size_t sz = v->dtdv_type.dtdt_size;
5178 5297
5179 5298 sz += sizeof (uint64_t);
5180 5299 ASSERT(svar->dtsv_size == NCPU * sz);
5181 5300 a += CPU->cpu_id * sz;
5182 5301
5183 5302 if (*(uint8_t *)a == UINT8_MAX) {
5184 5303 /*
5185 5304 * If the 0th byte is set to UINT8_MAX
5186 5305 * then this is to be treated as a
5187 5306 * reference to a NULL variable.
5188 5307 */
5189 5308 regs[rd] = NULL;
5190 5309 } else {
5191 5310 regs[rd] = a + sizeof (uint64_t);
5192 5311 }
5193 5312
5194 5313 break;
5195 5314 }
5196 5315
5197 5316 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
5198 5317 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5199 5318 regs[rd] = tmp[CPU->cpu_id];
5200 5319 break;
5201 5320
5202 5321 case DIF_OP_STLS:
5203 5322 id = DIF_INSTR_VAR(instr);
5204 5323
5205 5324 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5206 5325 id -= DIF_VAR_OTHER_UBASE;
5207 5326 ASSERT(id < vstate->dtvs_nlocals);
5208 5327
5209 5328 ASSERT(vstate->dtvs_locals != NULL);
5210 5329 svar = vstate->dtvs_locals[id];
5211 5330 ASSERT(svar != NULL);
5212 5331 v = &svar->dtsv_var;
5213 5332
5214 5333 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5215 5334 uintptr_t a = (uintptr_t)svar->dtsv_data;
5216 5335 size_t sz = v->dtdv_type.dtdt_size;
5217 5336
5218 5337 sz += sizeof (uint64_t);
5219 5338 ASSERT(svar->dtsv_size == NCPU * sz);
5220 5339 a += CPU->cpu_id * sz;
5221 5340
5222 5341 if (regs[rd] == NULL) {
5223 5342 *(uint8_t *)a = UINT8_MAX;
5224 5343 break;
5225 5344 } else {
5226 5345 *(uint8_t *)a = 0;
5227 5346 a += sizeof (uint64_t);
5228 5347 }
5229 5348
5230 5349 if (!dtrace_vcanload(
5231 5350 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5232 5351 mstate, vstate))
5233 5352 break;
5234 5353
5235 5354 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5236 5355 (void *)a, &v->dtdv_type);
5237 5356 break;
5238 5357 }
5239 5358
5240 5359 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
5241 5360 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5242 5361 tmp[CPU->cpu_id] = regs[rd];
5243 5362 break;
5244 5363
5245 5364 case DIF_OP_LDTS: {
5246 5365 dtrace_dynvar_t *dvar;
5247 5366 dtrace_key_t *key;
5248 5367
5249 5368 id = DIF_INSTR_VAR(instr);
5250 5369 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5251 5370 id -= DIF_VAR_OTHER_UBASE;
5252 5371 v = &vstate->dtvs_tlocals[id];
5253 5372
5254 5373 key = &tupregs[DIF_DTR_NREGS];
5255 5374 key[0].dttk_value = (uint64_t)id;
5256 5375 key[0].dttk_size = 0;
5257 5376 DTRACE_TLS_THRKEY(key[1].dttk_value);
5258 5377 key[1].dttk_size = 0;
5259 5378
5260 5379 dvar = dtrace_dynvar(dstate, 2, key,
5261 5380 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
5262 5381 mstate, vstate);
5263 5382
5264 5383 if (dvar == NULL) {
5265 5384 regs[rd] = 0;
5266 5385 break;
5267 5386 }
5268 5387
5269 5388 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5270 5389 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5271 5390 } else {
5272 5391 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5273 5392 }
5274 5393
5275 5394 break;
5276 5395 }
5277 5396
5278 5397 case DIF_OP_STTS: {
5279 5398 dtrace_dynvar_t *dvar;
5280 5399 dtrace_key_t *key;
5281 5400
5282 5401 id = DIF_INSTR_VAR(instr);
5283 5402 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5284 5403 id -= DIF_VAR_OTHER_UBASE;
5285 5404
5286 5405 key = &tupregs[DIF_DTR_NREGS];
5287 5406 key[0].dttk_value = (uint64_t)id;
5288 5407 key[0].dttk_size = 0;
5289 5408 DTRACE_TLS_THRKEY(key[1].dttk_value);
5290 5409 key[1].dttk_size = 0;
5291 5410 v = &vstate->dtvs_tlocals[id];
5292 5411
5293 5412 dvar = dtrace_dynvar(dstate, 2, key,
5294 5413 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5295 5414 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5296 5415 regs[rd] ? DTRACE_DYNVAR_ALLOC :
5297 5416 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
5298 5417
5299 5418 /*
5300 5419 * Given that we're storing to thread-local data,
5301 5420 * we need to flush our predicate cache.
5302 5421 */
5303 5422 curthread->t_predcache = NULL;
5304 5423
5305 5424 if (dvar == NULL)
5306 5425 break;
5307 5426
5308 5427 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5309 5428 if (!dtrace_vcanload(
5310 5429 (void *)(uintptr_t)regs[rd],
5311 5430 &v->dtdv_type, mstate, vstate))
5312 5431 break;
5313 5432
5314 5433 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5315 5434 dvar->dtdv_data, &v->dtdv_type);
5316 5435 } else {
5317 5436 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5318 5437 }
5319 5438
5320 5439 break;
5321 5440 }
5322 5441
5323 5442 case DIF_OP_SRA:
5324 5443 regs[rd] = (int64_t)regs[r1] >> regs[r2];
5325 5444 break;
5326 5445
5327 5446 case DIF_OP_CALL:
5328 5447 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
5329 5448 regs, tupregs, ttop, mstate, state);
5330 5449 break;
5331 5450
5332 5451 case DIF_OP_PUSHTR:
5333 5452 if (ttop == DIF_DTR_NREGS) {
5334 5453 *flags |= CPU_DTRACE_TUPOFLOW;
5335 5454 break;
5336 5455 }
5337 5456
5338 5457 if (r1 == DIF_TYPE_STRING) {
5339 5458 /*
5340 5459 * If this is a string type and the size is 0,
5341 5460 * we'll use the system-wide default string
5342 5461 * size. Note that we are _not_ looking at
5343 5462 * the value of the DTRACEOPT_STRSIZE option;
5344 5463 * had this been set, we would expect to have
5345 5464 * a non-zero size value in the "pushtr".
5346 5465 */
5347 5466 tupregs[ttop].dttk_size =
5348 5467 dtrace_strlen((char *)(uintptr_t)regs[rd],
5349 5468 regs[r2] ? regs[r2] :
5350 5469 dtrace_strsize_default) + 1;
5351 5470 } else {
5352 5471 tupregs[ttop].dttk_size = regs[r2];
5353 5472 }
5354 5473
5355 5474 tupregs[ttop++].dttk_value = regs[rd];
5356 5475 break;
5357 5476
5358 5477 case DIF_OP_PUSHTV:
5359 5478 if (ttop == DIF_DTR_NREGS) {
5360 5479 *flags |= CPU_DTRACE_TUPOFLOW;
5361 5480 break;
5362 5481 }
5363 5482
5364 5483 tupregs[ttop].dttk_value = regs[rd];
5365 5484 tupregs[ttop++].dttk_size = 0;
5366 5485 break;
5367 5486
5368 5487 case DIF_OP_POPTS:
5369 5488 if (ttop != 0)
5370 5489 ttop--;
5371 5490 break;
5372 5491
5373 5492 case DIF_OP_FLUSHTS:
5374 5493 ttop = 0;
5375 5494 break;
5376 5495
5377 5496 case DIF_OP_LDGAA:
5378 5497 case DIF_OP_LDTAA: {
5379 5498 dtrace_dynvar_t *dvar;
5380 5499 dtrace_key_t *key = tupregs;
5381 5500 uint_t nkeys = ttop;
5382 5501
5383 5502 id = DIF_INSTR_VAR(instr);
5384 5503 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5385 5504 id -= DIF_VAR_OTHER_UBASE;
5386 5505
5387 5506 key[nkeys].dttk_value = (uint64_t)id;
5388 5507 key[nkeys++].dttk_size = 0;
5389 5508
5390 5509 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
5391 5510 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5392 5511 key[nkeys++].dttk_size = 0;
5393 5512 v = &vstate->dtvs_tlocals[id];
5394 5513 } else {
5395 5514 v = &vstate->dtvs_globals[id]->dtsv_var;
5396 5515 }
5397 5516
5398 5517 dvar = dtrace_dynvar(dstate, nkeys, key,
5399 5518 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5400 5519 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5401 5520 DTRACE_DYNVAR_NOALLOC, mstate, vstate);
5402 5521
5403 5522 if (dvar == NULL) {
5404 5523 regs[rd] = 0;
5405 5524 break;
5406 5525 }
5407 5526
5408 5527 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5409 5528 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5410 5529 } else {
5411 5530 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5412 5531 }
5413 5532
5414 5533 break;
5415 5534 }
5416 5535
5417 5536 case DIF_OP_STGAA:
5418 5537 case DIF_OP_STTAA: {
5419 5538 dtrace_dynvar_t *dvar;
5420 5539 dtrace_key_t *key = tupregs;
5421 5540 uint_t nkeys = ttop;
5422 5541
5423 5542 id = DIF_INSTR_VAR(instr);
5424 5543 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5425 5544 id -= DIF_VAR_OTHER_UBASE;
5426 5545
5427 5546 key[nkeys].dttk_value = (uint64_t)id;
5428 5547 key[nkeys++].dttk_size = 0;
5429 5548
5430 5549 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
5431 5550 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5432 5551 key[nkeys++].dttk_size = 0;
5433 5552 v = &vstate->dtvs_tlocals[id];
5434 5553 } else {
5435 5554 v = &vstate->dtvs_globals[id]->dtsv_var;
5436 5555 }
5437 5556
5438 5557 dvar = dtrace_dynvar(dstate, nkeys, key,
5439 5558 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5440 5559 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5441 5560 regs[rd] ? DTRACE_DYNVAR_ALLOC :
5442 5561 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
5443 5562
5444 5563 if (dvar == NULL)
5445 5564 break;
5446 5565
5447 5566 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5448 5567 if (!dtrace_vcanload(
5449 5568 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
5450 5569 mstate, vstate))
5451 5570 break;
5452 5571
5453 5572 dtrace_vcopy((void *)(uintptr_t)regs[rd],
5454 5573 dvar->dtdv_data, &v->dtdv_type);
5455 5574 } else {
5456 5575 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5457 5576 }
5458 5577
5459 5578 break;
5460 5579 }
5461 5580
5462 5581 case DIF_OP_ALLOCS: {
5463 5582 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5464 5583 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
5465 5584
5466 5585 /*
5467 5586 * Rounding up the user allocation size could have
5468 5587 * overflowed large, bogus allocations (like -1ULL) to
5469 5588 * 0.
5470 5589 */
5471 5590 if (size < regs[r1] ||
5472 5591 !DTRACE_INSCRATCH(mstate, size)) {
5473 5592 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5474 5593 regs[rd] = NULL;
5475 5594 break;
5476 5595 }
5477 5596
5478 5597 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
5479 5598 mstate->dtms_scratch_ptr += size;
5480 5599 regs[rd] = ptr;
5481 5600 break;
5482 5601 }
5483 5602
5484 5603 case DIF_OP_COPYS:
5485 5604 if (!dtrace_canstore(regs[rd], regs[r2],
5486 5605 mstate, vstate)) {
5487 5606 *flags |= CPU_DTRACE_BADADDR;
5488 5607 *illval = regs[rd];
5489 5608 break;
5490 5609 }
5491 5610
5492 5611 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
5493 5612 break;
5494 5613
5495 5614 dtrace_bcopy((void *)(uintptr_t)regs[r1],
5496 5615 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
5497 5616 break;
5498 5617
5499 5618 case DIF_OP_STB:
5500 5619 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
5501 5620 *flags |= CPU_DTRACE_BADADDR;
5502 5621 *illval = regs[rd];
5503 5622 break;
5504 5623 }
5505 5624 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
5506 5625 break;
5507 5626
5508 5627 case DIF_OP_STH:
5509 5628 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
5510 5629 *flags |= CPU_DTRACE_BADADDR;
5511 5630 *illval = regs[rd];
5512 5631 break;
5513 5632 }
5514 5633 if (regs[rd] & 1) {
5515 5634 *flags |= CPU_DTRACE_BADALIGN;
5516 5635 *illval = regs[rd];
5517 5636 break;
5518 5637 }
5519 5638 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
5520 5639 break;
5521 5640
5522 5641 case DIF_OP_STW:
5523 5642 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
5524 5643 *flags |= CPU_DTRACE_BADADDR;
5525 5644 *illval = regs[rd];
5526 5645 break;
5527 5646 }
5528 5647 if (regs[rd] & 3) {
5529 5648 *flags |= CPU_DTRACE_BADALIGN;
5530 5649 *illval = regs[rd];
5531 5650 break;
5532 5651 }
5533 5652 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
5534 5653 break;
5535 5654
5536 5655 case DIF_OP_STX:
5537 5656 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
5538 5657 *flags |= CPU_DTRACE_BADADDR;
5539 5658 *illval = regs[rd];
5540 5659 break;
5541 5660 }
5542 5661 if (regs[rd] & 7) {
5543 5662 *flags |= CPU_DTRACE_BADALIGN;
5544 5663 *illval = regs[rd];
5545 5664 break;
5546 5665 }
5547 5666 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
5548 5667 break;
5549 5668 }
5550 5669 }
5551 5670
5552 5671 if (!(*flags & CPU_DTRACE_FAULT))
5553 5672 return (rval);
5554 5673
5555 5674 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
5556 5675 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
5557 5676
5558 5677 return (0);
5559 5678 }
5560 5679
5561 5680 static void
5562 5681 dtrace_action_breakpoint(dtrace_ecb_t *ecb)
5563 5682 {
5564 5683 dtrace_probe_t *probe = ecb->dte_probe;
5565 5684 dtrace_provider_t *prov = probe->dtpr_provider;
5566 5685 char c[DTRACE_FULLNAMELEN + 80], *str;
5567 5686 char *msg = "dtrace: breakpoint action at probe ";
5568 5687 char *ecbmsg = " (ecb ";
5569 5688 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
5570 5689 uintptr_t val = (uintptr_t)ecb;
5571 5690 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
5572 5691
5573 5692 if (dtrace_destructive_disallow)
5574 5693 return;
5575 5694
5576 5695 /*
5577 5696 * It's impossible to be taking action on the NULL probe.
5578 5697 */
5579 5698 ASSERT(probe != NULL);
5580 5699
5581 5700 /*
5582 5701 * This is a poor man's (destitute man's?) sprintf(): we want to
5583 5702 * print the provider name, module name, function name and name of
5584 5703 * the probe, along with the hex address of the ECB with the breakpoint
5585 5704 * action -- all of which we must place in the character buffer by
5586 5705 * hand.
5587 5706 */
5588 5707 while (*msg != '\0')
5589 5708 c[i++] = *msg++;
5590 5709
5591 5710 for (str = prov->dtpv_name; *str != '\0'; str++)
5592 5711 c[i++] = *str;
5593 5712 c[i++] = ':';
5594 5713
5595 5714 for (str = probe->dtpr_mod; *str != '\0'; str++)
5596 5715 c[i++] = *str;
5597 5716 c[i++] = ':';
5598 5717
5599 5718 for (str = probe->dtpr_func; *str != '\0'; str++)
5600 5719 c[i++] = *str;
5601 5720 c[i++] = ':';
5602 5721
5603 5722 for (str = probe->dtpr_name; *str != '\0'; str++)
5604 5723 c[i++] = *str;
5605 5724
5606 5725 while (*ecbmsg != '\0')
5607 5726 c[i++] = *ecbmsg++;
5608 5727
5609 5728 while (shift >= 0) {
5610 5729 mask = (uintptr_t)0xf << shift;
5611 5730
5612 5731 if (val >= ((uintptr_t)1 << shift))
5613 5732 c[i++] = "0123456789abcdef"[(val & mask) >> shift];
5614 5733 shift -= 4;
5615 5734 }
5616 5735
5617 5736 c[i++] = ')';
5618 5737 c[i] = '\0';
5619 5738
5620 5739 debug_enter(c);
5621 5740 }
5622 5741
5623 5742 static void
5624 5743 dtrace_action_panic(dtrace_ecb_t *ecb)
5625 5744 {
5626 5745 dtrace_probe_t *probe = ecb->dte_probe;
5627 5746
5628 5747 /*
5629 5748 * It's impossible to be taking action on the NULL probe.
5630 5749 */
5631 5750 ASSERT(probe != NULL);
5632 5751
5633 5752 if (dtrace_destructive_disallow)
5634 5753 return;
5635 5754
5636 5755 if (dtrace_panicked != NULL)
5637 5756 return;
5638 5757
5639 5758 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
5640 5759 return;
5641 5760
5642 5761 /*
5643 5762 * We won the right to panic. (We want to be sure that only one
5644 5763 * thread calls panic() from dtrace_probe(), and that panic() is
5645 5764 * called exactly once.)
5646 5765 */
5647 5766 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5648 5767 probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
5649 5768 probe->dtpr_func, probe->dtpr_name, (void *)ecb);
5650 5769 }
5651 5770
5652 5771 static void
5653 5772 dtrace_action_raise(uint64_t sig)
5654 5773 {
5655 5774 if (dtrace_destructive_disallow)
5656 5775 return;
5657 5776
5658 5777 if (sig >= NSIG) {
5659 5778 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5660 5779 return;
5661 5780 }
5662 5781
5663 5782 /*
5664 5783 * raise() has a queue depth of 1 -- we ignore all subsequent
5665 5784 * invocations of the raise() action.
5666 5785 */
5667 5786 if (curthread->t_dtrace_sig == 0)
5668 5787 curthread->t_dtrace_sig = (uint8_t)sig;
5669 5788
5670 5789 curthread->t_sig_check = 1;
5671 5790 aston(curthread);
5672 5791 }
5673 5792
5674 5793 static void
5675 5794 dtrace_action_stop(void)
5676 5795 {
5677 5796 if (dtrace_destructive_disallow)
5678 5797 return;
5679 5798
5680 5799 if (!curthread->t_dtrace_stop) {
5681 5800 curthread->t_dtrace_stop = 1;
5682 5801 curthread->t_sig_check = 1;
5683 5802 aston(curthread);
5684 5803 }
5685 5804 }
5686 5805
5687 5806 static void
5688 5807 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
5689 5808 {
5690 5809 hrtime_t now;
5691 5810 volatile uint16_t *flags;
5692 5811 cpu_t *cpu = CPU;
5693 5812
5694 5813 if (dtrace_destructive_disallow)
5695 5814 return;
5696 5815
5697 5816 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
5698 5817
5699 5818 now = dtrace_gethrtime();
5700 5819
5701 5820 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
5702 5821 /*
5703 5822 * We need to advance the mark to the current time.
5704 5823 */
5705 5824 cpu->cpu_dtrace_chillmark = now;
5706 5825 cpu->cpu_dtrace_chilled = 0;
5707 5826 }
5708 5827
5709 5828 /*
5710 5829 * Now check to see if the requested chill time would take us over
5711 5830 * the maximum amount of time allowed in the chill interval. (Or
5712 5831 * worse, if the calculation itself induces overflow.)
5713 5832 */
5714 5833 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
5715 5834 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
5716 5835 *flags |= CPU_DTRACE_ILLOP;
5717 5836 return;
5718 5837 }
5719 5838
5720 5839 while (dtrace_gethrtime() - now < val)
5721 5840 continue;
5722 5841
5723 5842 /*
5724 5843 * Normally, we assure that the value of the variable "timestamp" does
5725 5844 * not change within an ECB. The presence of chill() represents an
5726 5845 * exception to this rule, however.
5727 5846 */
5728 5847 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
5729 5848 cpu->cpu_dtrace_chilled += val;
5730 5849 }
5731 5850
5732 5851 static void
5733 5852 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
5734 5853 uint64_t *buf, uint64_t arg)
5735 5854 {
5736 5855 int nframes = DTRACE_USTACK_NFRAMES(arg);
5737 5856 int strsize = DTRACE_USTACK_STRSIZE(arg);
5738 5857 uint64_t *pcs = &buf[1], *fps;
5739 5858 char *str = (char *)&pcs[nframes];
5740 5859 int size, offs = 0, i, j;
5741 5860 uintptr_t old = mstate->dtms_scratch_ptr, saved;
5742 5861 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
5743 5862 char *sym;
5744 5863
5745 5864 /*
5746 5865 * Should be taking a faster path if string space has not been
5747 5866 * allocated.
5748 5867 */
5749 5868 ASSERT(strsize != 0);
5750 5869
5751 5870 /*
5752 5871 * We will first allocate some temporary space for the frame pointers.
5753 5872 */
5754 5873 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5755 5874 size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
5756 5875 (nframes * sizeof (uint64_t));
5757 5876
5758 5877 if (!DTRACE_INSCRATCH(mstate, size)) {
5759 5878 /*
5760 5879 * Not enough room for our frame pointers -- need to indicate
5761 5880 * that we ran out of scratch space.
5762 5881 */
5763 5882 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5764 5883 return;
5765 5884 }
5766 5885
5767 5886 mstate->dtms_scratch_ptr += size;
5768 5887 saved = mstate->dtms_scratch_ptr;
5769 5888
5770 5889 /*
5771 5890 * Now get a stack with both program counters and frame pointers.
5772 5891 */
5773 5892 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5774 5893 dtrace_getufpstack(buf, fps, nframes + 1);
5775 5894 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5776 5895
5777 5896 /*
5778 5897 * If that faulted, we're cooked.
5779 5898 */
5780 5899 if (*flags & CPU_DTRACE_FAULT)
5781 5900 goto out;
5782 5901
5783 5902 /*
5784 5903 * Now we want to walk up the stack, calling the USTACK helper. For
5785 5904 * each iteration, we restore the scratch pointer.
5786 5905 */
5787 5906 for (i = 0; i < nframes; i++) {
5788 5907 mstate->dtms_scratch_ptr = saved;
5789 5908
5790 5909 if (offs >= strsize)
5791 5910 break;
5792 5911
5793 5912 sym = (char *)(uintptr_t)dtrace_helper(
5794 5913 DTRACE_HELPER_ACTION_USTACK,
5795 5914 mstate, state, pcs[i], fps[i]);
5796 5915
5797 5916 /*
5798 5917 * If we faulted while running the helper, we're going to
5799 5918 * clear the fault and null out the corresponding string.
5800 5919 */
5801 5920 if (*flags & CPU_DTRACE_FAULT) {
5802 5921 *flags &= ~CPU_DTRACE_FAULT;
5803 5922 str[offs++] = '\0';
5804 5923 continue;
5805 5924 }
5806 5925
5807 5926 if (sym == NULL) {
5808 5927 str[offs++] = '\0';
5809 5928 continue;
5810 5929 }
5811 5930
5812 5931 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5813 5932
5814 5933 /*
5815 5934 * Now copy in the string that the helper returned to us.
5816 5935 */
5817 5936 for (j = 0; offs + j < strsize; j++) {
5818 5937 if ((str[offs + j] = sym[j]) == '\0')
5819 5938 break;
5820 5939 }
5821 5940
5822 5941 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5823 5942
5824 5943 offs += j + 1;
5825 5944 }
5826 5945
5827 5946 if (offs >= strsize) {
5828 5947 /*
5829 5948 * If we didn't have room for all of the strings, we don't
5830 5949 * abort processing -- this needn't be a fatal error -- but we
5831 5950 * still want to increment a counter (dts_stkstroverflows) to
5832 5951 * allow this condition to be warned about. (If this is from
5833 5952 * a jstack() action, it is easily tuned via jstackstrsize.)
5834 5953 */
5835 5954 dtrace_error(&state->dts_stkstroverflows);
5836 5955 }
5837 5956
5838 5957 while (offs < strsize)
5839 5958 str[offs++] = '\0';
5840 5959
5841 5960 out:
5842 5961 mstate->dtms_scratch_ptr = old;
5843 5962 }
5844 5963
5845 5964 /*
5846 5965 * If you're looking for the epicenter of DTrace, you just found it. This
5847 5966 * is the function called by the provider to fire a probe -- from which all
5848 5967 * subsequent probe-context DTrace activity emanates.
5849 5968 */
5850 5969 void
5851 5970 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
5852 5971 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
5853 5972 {
5854 5973 processorid_t cpuid;
5855 5974 dtrace_icookie_t cookie;
5856 5975 dtrace_probe_t *probe;
5857 5976 dtrace_mstate_t mstate;
5858 5977 dtrace_ecb_t *ecb;
5859 5978 dtrace_action_t *act;
5860 5979 intptr_t offs;
5861 5980 size_t size;
5862 5981 int vtime, onintr;
5863 5982 volatile uint16_t *flags;
5864 5983 hrtime_t now, end;
5865 5984
5866 5985 /*
5867 5986 * Kick out immediately if this CPU is still being born (in which case
5868 5987 * curthread will be set to -1) or the current thread can't allow
5869 5988 * probes in its current context.
5870 5989 */
5871 5990 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
5872 5991 return;
5873 5992
5874 5993 cookie = dtrace_interrupt_disable();
5875 5994 probe = dtrace_probes[id - 1];
5876 5995 cpuid = CPU->cpu_id;
5877 5996 onintr = CPU_ON_INTR(CPU);
5878 5997
5879 5998 CPU->cpu_dtrace_probes++;
5880 5999
5881 6000 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
5882 6001 probe->dtpr_predcache == curthread->t_predcache) {
5883 6002 /*
5884 6003 * We have hit in the predicate cache; we know that
5885 6004 * this predicate would evaluate to be false.
5886 6005 */
5887 6006 dtrace_interrupt_enable(cookie);
5888 6007 return;
5889 6008 }
5890 6009
5891 6010 if (panic_quiesce) {
5892 6011 /*
5893 6012 * We don't trace anything if we're panicking.
5894 6013 */
5895 6014 dtrace_interrupt_enable(cookie);
5896 6015 return;
5897 6016 }
5898 6017
5899 6018 now = dtrace_gethrtime();
5900 6019 vtime = dtrace_vtime_references != 0;
5901 6020
5902 6021 if (vtime && curthread->t_dtrace_start)
5903 6022 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
5904 6023
5905 6024 mstate.dtms_difo = NULL;
5906 6025 mstate.dtms_probe = probe;
5907 6026 mstate.dtms_strtok = NULL;
5908 6027 mstate.dtms_arg[0] = arg0;
5909 6028 mstate.dtms_arg[1] = arg1;
5910 6029 mstate.dtms_arg[2] = arg2;
5911 6030 mstate.dtms_arg[3] = arg3;
5912 6031 mstate.dtms_arg[4] = arg4;
5913 6032
5914 6033 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
5915 6034
5916 6035 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
5917 6036 dtrace_predicate_t *pred = ecb->dte_predicate;
5918 6037 dtrace_state_t *state = ecb->dte_state;
5919 6038 dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
5920 6039 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
5921 6040 dtrace_vstate_t *vstate = &state->dts_vstate;
5922 6041 dtrace_provider_t *prov = probe->dtpr_provider;
5923 6042 uint64_t tracememsize = 0;
5924 6043 int committed = 0;
5925 6044 caddr_t tomax;
5926 6045
5927 6046 /*
5928 6047 * A little subtlety with the following (seemingly innocuous)
5929 6048 * declaration of the automatic 'val': by looking at the
5930 6049 * code, you might think that it could be declared in the
5931 6050 * action processing loop, below. (That is, it's only used in
5932 6051 * the action processing loop.) However, it must be declared
5933 6052 * out of that scope because in the case of DIF expression
5934 6053 * arguments to aggregating actions, one iteration of the
↓ open down ↓ |
918 lines elided |
↑ open up ↑ |
5935 6054 * action loop will use the last iteration's value.
5936 6055 */
5937 6056 #ifdef lint
5938 6057 uint64_t val = 0;
5939 6058 #else
5940 6059 uint64_t val;
5941 6060 #endif
5942 6061
5943 6062 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
5944 6063 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
6064 + mstate.dtms_getf = NULL;
6065 +
5945 6066 *flags &= ~CPU_DTRACE_ERROR;
5946 6067
5947 6068 if (prov == dtrace_provider) {
5948 6069 /*
5949 6070 * If dtrace itself is the provider of this probe,
5950 6071 * we're only going to continue processing the ECB if
5951 6072 * arg0 (the dtrace_state_t) is equal to the ECB's
5952 6073 * creating state. (This prevents disjoint consumers
5953 6074 * from seeing one another's metaprobes.)
5954 6075 */
5955 6076 if (arg0 != (uint64_t)(uintptr_t)state)
5956 6077 continue;
5957 6078 }
5958 6079
5959 6080 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
5960 6081 /*
5961 6082 * We're not currently active. If our provider isn't
5962 6083 * the dtrace pseudo provider, we're not interested.
5963 6084 */
5964 6085 if (prov != dtrace_provider)
5965 6086 continue;
5966 6087
5967 6088 /*
5968 6089 * Now we must further check if we are in the BEGIN
5969 6090 * probe. If we are, we will only continue processing
5970 6091 * if we're still in WARMUP -- if one BEGIN enabling
5971 6092 * has invoked the exit() action, we don't want to
5972 6093 * evaluate subsequent BEGIN enablings.
5973 6094 */
5974 6095 if (probe->dtpr_id == dtrace_probeid_begin &&
5975 6096 state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
5976 6097 ASSERT(state->dts_activity ==
5977 6098 DTRACE_ACTIVITY_DRAINING);
5978 6099 continue;
5979 6100 }
5980 6101 }
5981 6102
5982 6103 if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
5983 6104 continue;
5984 6105
5985 6106 if (now - state->dts_alive > dtrace_deadman_timeout) {
5986 6107 /*
5987 6108 * We seem to be dead. Unless we (a) have kernel
5988 6109 * destructive permissions (b) have explicitly enabled
5989 6110 * destructive actions and (c) destructive actions have
5990 6111 * not been disabled, we're going to transition into
5991 6112 * the KILLED state, from which no further processing
5992 6113 * on this state will be performed.
5993 6114 */
5994 6115 if (!dtrace_priv_kernel_destructive(state) ||
5995 6116 !state->dts_cred.dcr_destructive ||
5996 6117 dtrace_destructive_disallow) {
5997 6118 void *activity = &state->dts_activity;
5998 6119 dtrace_activity_t current;
5999 6120
6000 6121 do {
6001 6122 current = state->dts_activity;
6002 6123 } while (dtrace_cas32(activity, current,
6003 6124 DTRACE_ACTIVITY_KILLED) != current);
6004 6125
6005 6126 continue;
6006 6127 }
6007 6128 }
6008 6129
6009 6130 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
6010 6131 ecb->dte_alignment, state, &mstate)) < 0)
6011 6132 continue;
6012 6133
6013 6134 tomax = buf->dtb_tomax;
6014 6135 ASSERT(tomax != NULL);
6015 6136
6016 6137 if (ecb->dte_size != 0) {
6017 6138 dtrace_rechdr_t dtrh;
6018 6139 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
6019 6140 mstate.dtms_timestamp = dtrace_gethrtime();
6020 6141 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
6021 6142 }
6022 6143 ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
6023 6144 dtrh.dtrh_epid = ecb->dte_epid;
6024 6145 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
6025 6146 mstate.dtms_timestamp);
6026 6147 *((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
6027 6148 }
6028 6149
6029 6150 mstate.dtms_epid = ecb->dte_epid;
6030 6151 mstate.dtms_present |= DTRACE_MSTATE_EPID;
6031 6152
6032 6153 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
6033 6154 mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
6034 6155
6035 6156 if (pred != NULL) {
6036 6157 dtrace_difo_t *dp = pred->dtp_difo;
6037 6158 int rval;
6038 6159
6039 6160 rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
6040 6161
6041 6162 if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
6042 6163 dtrace_cacheid_t cid = probe->dtpr_predcache;
6043 6164
6044 6165 if (cid != DTRACE_CACHEIDNONE && !onintr) {
6045 6166 /*
6046 6167 * Update the predicate cache...
6047 6168 */
6048 6169 ASSERT(cid == pred->dtp_cacheid);
6049 6170 curthread->t_predcache = cid;
6050 6171 }
6051 6172
6052 6173 continue;
6053 6174 }
6054 6175 }
6055 6176
6056 6177 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
6057 6178 act != NULL; act = act->dta_next) {
6058 6179 size_t valoffs;
6059 6180 dtrace_difo_t *dp;
6060 6181 dtrace_recdesc_t *rec = &act->dta_rec;
6061 6182
6062 6183 size = rec->dtrd_size;
6063 6184 valoffs = offs + rec->dtrd_offset;
6064 6185
6065 6186 if (DTRACEACT_ISAGG(act->dta_kind)) {
6066 6187 uint64_t v = 0xbad;
6067 6188 dtrace_aggregation_t *agg;
6068 6189
6069 6190 agg = (dtrace_aggregation_t *)act;
6070 6191
6071 6192 if ((dp = act->dta_difo) != NULL)
6072 6193 v = dtrace_dif_emulate(dp,
6073 6194 &mstate, vstate, state);
6074 6195
6075 6196 if (*flags & CPU_DTRACE_ERROR)
6076 6197 continue;
6077 6198
6078 6199 /*
6079 6200 * Note that we always pass the expression
6080 6201 * value from the previous iteration of the
6081 6202 * action loop. This value will only be used
6082 6203 * if there is an expression argument to the
6083 6204 * aggregating action, denoted by the
6084 6205 * dtag_hasarg field.
6085 6206 */
6086 6207 dtrace_aggregate(agg, buf,
6087 6208 offs, aggbuf, v, val);
6088 6209 continue;
6089 6210 }
6090 6211
6091 6212 switch (act->dta_kind) {
6092 6213 case DTRACEACT_STOP:
6093 6214 if (dtrace_priv_proc_destructive(state,
6094 6215 &mstate))
6095 6216 dtrace_action_stop();
6096 6217 continue;
6097 6218
6098 6219 case DTRACEACT_BREAKPOINT:
6099 6220 if (dtrace_priv_kernel_destructive(state))
6100 6221 dtrace_action_breakpoint(ecb);
6101 6222 continue;
6102 6223
6103 6224 case DTRACEACT_PANIC:
6104 6225 if (dtrace_priv_kernel_destructive(state))
6105 6226 dtrace_action_panic(ecb);
6106 6227 continue;
6107 6228
6108 6229 case DTRACEACT_STACK:
6109 6230 if (!dtrace_priv_kernel(state))
6110 6231 continue;
6111 6232
6112 6233 dtrace_getpcstack((pc_t *)(tomax + valoffs),
6113 6234 size / sizeof (pc_t), probe->dtpr_aframes,
6114 6235 DTRACE_ANCHORED(probe) ? NULL :
6115 6236 (uint32_t *)arg0);
6116 6237
6117 6238 continue;
6118 6239
6119 6240 case DTRACEACT_JSTACK:
6120 6241 case DTRACEACT_USTACK:
6121 6242 if (!dtrace_priv_proc(state, &mstate))
6122 6243 continue;
6123 6244
6124 6245 /*
6125 6246 * See comment in DIF_VAR_PID.
6126 6247 */
6127 6248 if (DTRACE_ANCHORED(mstate.dtms_probe) &&
6128 6249 CPU_ON_INTR(CPU)) {
6129 6250 int depth = DTRACE_USTACK_NFRAMES(
6130 6251 rec->dtrd_arg) + 1;
6131 6252
6132 6253 dtrace_bzero((void *)(tomax + valoffs),
6133 6254 DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
6134 6255 + depth * sizeof (uint64_t));
6135 6256
6136 6257 continue;
6137 6258 }
6138 6259
6139 6260 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
6140 6261 curproc->p_dtrace_helpers != NULL) {
6141 6262 /*
6142 6263 * This is the slow path -- we have
6143 6264 * allocated string space, and we're
6144 6265 * getting the stack of a process that
6145 6266 * has helpers. Call into a separate
6146 6267 * routine to perform this processing.
6147 6268 */
6148 6269 dtrace_action_ustack(&mstate, state,
6149 6270 (uint64_t *)(tomax + valoffs),
6150 6271 rec->dtrd_arg);
6151 6272 continue;
6152 6273 }
6153 6274
6154 6275 /*
6155 6276 * Clear the string space, since there's no
6156 6277 * helper to do it for us.
6157 6278 */
6158 6279 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
6159 6280 int depth = DTRACE_USTACK_NFRAMES(
6160 6281 rec->dtrd_arg);
6161 6282 size_t strsize = DTRACE_USTACK_STRSIZE(
6162 6283 rec->dtrd_arg);
6163 6284 uint64_t *buf = (uint64_t *)(tomax +
6164 6285 valoffs);
6165 6286 void *strspace = &buf[depth + 1];
6166 6287
6167 6288 dtrace_bzero(strspace,
6168 6289 MIN(depth, strsize));
6169 6290 }
6170 6291
6171 6292 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6172 6293 dtrace_getupcstack((uint64_t *)
6173 6294 (tomax + valoffs),
6174 6295 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
6175 6296 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6176 6297 continue;
6177 6298
6178 6299 default:
6179 6300 break;
6180 6301 }
6181 6302
6182 6303 dp = act->dta_difo;
6183 6304 ASSERT(dp != NULL);
6184 6305
6185 6306 val = dtrace_dif_emulate(dp, &mstate, vstate, state);
6186 6307
6187 6308 if (*flags & CPU_DTRACE_ERROR)
6188 6309 continue;
6189 6310
6190 6311 switch (act->dta_kind) {
6191 6312 case DTRACEACT_SPECULATE: {
6192 6313 dtrace_rechdr_t *dtrh;
6193 6314
6194 6315 ASSERT(buf == &state->dts_buffer[cpuid]);
6195 6316 buf = dtrace_speculation_buffer(state,
6196 6317 cpuid, val);
6197 6318
6198 6319 if (buf == NULL) {
6199 6320 *flags |= CPU_DTRACE_DROP;
6200 6321 continue;
6201 6322 }
6202 6323
6203 6324 offs = dtrace_buffer_reserve(buf,
6204 6325 ecb->dte_needed, ecb->dte_alignment,
6205 6326 state, NULL);
6206 6327
6207 6328 if (offs < 0) {
6208 6329 *flags |= CPU_DTRACE_DROP;
6209 6330 continue;
6210 6331 }
6211 6332
6212 6333 tomax = buf->dtb_tomax;
6213 6334 ASSERT(tomax != NULL);
6214 6335
6215 6336 if (ecb->dte_size == 0)
6216 6337 continue;
6217 6338
6218 6339 ASSERT3U(ecb->dte_size, >=,
6219 6340 sizeof (dtrace_rechdr_t));
6220 6341 dtrh = ((void *)(tomax + offs));
6221 6342 dtrh->dtrh_epid = ecb->dte_epid;
6222 6343 /*
6223 6344 * When the speculation is committed, all of
6224 6345 * the records in the speculative buffer will
6225 6346 * have their timestamps set to the commit
6226 6347 * time. Until then, it is set to a sentinel
6227 6348 * value, for debugability.
6228 6349 */
6229 6350 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
6230 6351 continue;
6231 6352 }
6232 6353
6233 6354 case DTRACEACT_CHILL:
6234 6355 if (dtrace_priv_kernel_destructive(state))
6235 6356 dtrace_action_chill(&mstate, val);
6236 6357 continue;
6237 6358
6238 6359 case DTRACEACT_RAISE:
6239 6360 if (dtrace_priv_proc_destructive(state,
6240 6361 &mstate))
6241 6362 dtrace_action_raise(val);
6242 6363 continue;
6243 6364
6244 6365 case DTRACEACT_COMMIT:
6245 6366 ASSERT(!committed);
6246 6367
6247 6368 /*
6248 6369 * We need to commit our buffer state.
6249 6370 */
6250 6371 if (ecb->dte_size)
6251 6372 buf->dtb_offset = offs + ecb->dte_size;
6252 6373 buf = &state->dts_buffer[cpuid];
6253 6374 dtrace_speculation_commit(state, cpuid, val);
6254 6375 committed = 1;
6255 6376 continue;
6256 6377
6257 6378 case DTRACEACT_DISCARD:
6258 6379 dtrace_speculation_discard(state, cpuid, val);
6259 6380 continue;
6260 6381
6261 6382 case DTRACEACT_DIFEXPR:
6262 6383 case DTRACEACT_LIBACT:
6263 6384 case DTRACEACT_PRINTF:
6264 6385 case DTRACEACT_PRINTA:
6265 6386 case DTRACEACT_SYSTEM:
6266 6387 case DTRACEACT_FREOPEN:
6267 6388 case DTRACEACT_TRACEMEM:
6268 6389 break;
6269 6390
6270 6391 case DTRACEACT_TRACEMEM_DYNSIZE:
6271 6392 tracememsize = val;
6272 6393 break;
6273 6394
6274 6395 case DTRACEACT_SYM:
6275 6396 case DTRACEACT_MOD:
6276 6397 if (!dtrace_priv_kernel(state))
6277 6398 continue;
6278 6399 break;
6279 6400
6280 6401 case DTRACEACT_USYM:
6281 6402 case DTRACEACT_UMOD:
6282 6403 case DTRACEACT_UADDR: {
6283 6404 struct pid *pid = curthread->t_procp->p_pidp;
6284 6405
6285 6406 if (!dtrace_priv_proc(state, &mstate))
6286 6407 continue;
6287 6408
6288 6409 DTRACE_STORE(uint64_t, tomax,
6289 6410 valoffs, (uint64_t)pid->pid_id);
6290 6411 DTRACE_STORE(uint64_t, tomax,
6291 6412 valoffs + sizeof (uint64_t), val);
6292 6413
6293 6414 continue;
6294 6415 }
6295 6416
6296 6417 case DTRACEACT_EXIT: {
6297 6418 /*
6298 6419 * For the exit action, we are going to attempt
6299 6420 * to atomically set our activity to be
6300 6421 * draining. If this fails (either because
6301 6422 * another CPU has beat us to the exit action,
6302 6423 * or because our current activity is something
6303 6424 * other than ACTIVE or WARMUP), we will
6304 6425 * continue. This assures that the exit action
6305 6426 * can be successfully recorded at most once
6306 6427 * when we're in the ACTIVE state. If we're
6307 6428 * encountering the exit() action while in
6308 6429 * COOLDOWN, however, we want to honor the new
6309 6430 * status code. (We know that we're the only
6310 6431 * thread in COOLDOWN, so there is no race.)
6311 6432 */
6312 6433 void *activity = &state->dts_activity;
6313 6434 dtrace_activity_t current = state->dts_activity;
6314 6435
6315 6436 if (current == DTRACE_ACTIVITY_COOLDOWN)
6316 6437 break;
6317 6438
6318 6439 if (current != DTRACE_ACTIVITY_WARMUP)
6319 6440 current = DTRACE_ACTIVITY_ACTIVE;
6320 6441
6321 6442 if (dtrace_cas32(activity, current,
6322 6443 DTRACE_ACTIVITY_DRAINING) != current) {
6323 6444 *flags |= CPU_DTRACE_DROP;
6324 6445 continue;
6325 6446 }
6326 6447
6327 6448 break;
6328 6449 }
6329 6450
6330 6451 default:
6331 6452 ASSERT(0);
6332 6453 }
6333 6454
6334 6455 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
6335 6456 uintptr_t end = valoffs + size;
6336 6457
6337 6458 if (tracememsize != 0 &&
6338 6459 valoffs + tracememsize < end) {
6339 6460 end = valoffs + tracememsize;
6340 6461 tracememsize = 0;
6341 6462 }
6342 6463
6343 6464 if (!dtrace_vcanload((void *)(uintptr_t)val,
6344 6465 &dp->dtdo_rtype, &mstate, vstate))
6345 6466 continue;
6346 6467
6347 6468 /*
6348 6469 * If this is a string, we're going to only
6349 6470 * load until we find the zero byte -- after
6350 6471 * which we'll store zero bytes.
6351 6472 */
6352 6473 if (dp->dtdo_rtype.dtdt_kind ==
6353 6474 DIF_TYPE_STRING) {
6354 6475 char c = '\0' + 1;
6355 6476 int intuple = act->dta_intuple;
6356 6477 size_t s;
6357 6478
6358 6479 for (s = 0; s < size; s++) {
6359 6480 if (c != '\0')
6360 6481 c = dtrace_load8(val++);
6361 6482
6362 6483 DTRACE_STORE(uint8_t, tomax,
6363 6484 valoffs++, c);
6364 6485
6365 6486 if (c == '\0' && intuple)
6366 6487 break;
6367 6488 }
6368 6489
6369 6490 continue;
6370 6491 }
6371 6492
6372 6493 while (valoffs < end) {
6373 6494 DTRACE_STORE(uint8_t, tomax, valoffs++,
6374 6495 dtrace_load8(val++));
6375 6496 }
6376 6497
6377 6498 continue;
6378 6499 }
6379 6500
6380 6501 switch (size) {
6381 6502 case 0:
6382 6503 break;
6383 6504
6384 6505 case sizeof (uint8_t):
6385 6506 DTRACE_STORE(uint8_t, tomax, valoffs, val);
6386 6507 break;
6387 6508 case sizeof (uint16_t):
6388 6509 DTRACE_STORE(uint16_t, tomax, valoffs, val);
6389 6510 break;
6390 6511 case sizeof (uint32_t):
6391 6512 DTRACE_STORE(uint32_t, tomax, valoffs, val);
6392 6513 break;
6393 6514 case sizeof (uint64_t):
6394 6515 DTRACE_STORE(uint64_t, tomax, valoffs, val);
6395 6516 break;
6396 6517 default:
6397 6518 /*
6398 6519 * Any other size should have been returned by
6399 6520 * reference, not by value.
6400 6521 */
6401 6522 ASSERT(0);
6402 6523 break;
6403 6524 }
6404 6525 }
6405 6526
6406 6527 if (*flags & CPU_DTRACE_DROP)
6407 6528 continue;
6408 6529
6409 6530 if (*flags & CPU_DTRACE_FAULT) {
6410 6531 int ndx;
6411 6532 dtrace_action_t *err;
6412 6533
6413 6534 buf->dtb_errors++;
6414 6535
6415 6536 if (probe->dtpr_id == dtrace_probeid_error) {
6416 6537 /*
6417 6538 * There's nothing we can do -- we had an
6418 6539 * error on the error probe. We bump an
6419 6540 * error counter to at least indicate that
6420 6541 * this condition happened.
6421 6542 */
6422 6543 dtrace_error(&state->dts_dblerrors);
6423 6544 continue;
6424 6545 }
6425 6546
6426 6547 if (vtime) {
6427 6548 /*
6428 6549 * Before recursing on dtrace_probe(), we
6429 6550 * need to explicitly clear out our start
6430 6551 * time to prevent it from being accumulated
6431 6552 * into t_dtrace_vtime.
6432 6553 */
6433 6554 curthread->t_dtrace_start = 0;
6434 6555 }
6435 6556
6436 6557 /*
6437 6558 * Iterate over the actions to figure out which action
6438 6559 * we were processing when we experienced the error.
6439 6560 * Note that act points _past_ the faulting action; if
6440 6561 * act is ecb->dte_action, the fault was in the
6441 6562 * predicate, if it's ecb->dte_action->dta_next it's
6442 6563 * in action #1, and so on.
6443 6564 */
6444 6565 for (err = ecb->dte_action, ndx = 0;
6445 6566 err != act; err = err->dta_next, ndx++)
6446 6567 continue;
6447 6568
6448 6569 dtrace_probe_error(state, ecb->dte_epid, ndx,
6449 6570 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
6450 6571 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
6451 6572 cpu_core[cpuid].cpuc_dtrace_illval);
6452 6573
6453 6574 continue;
6454 6575 }
6455 6576
6456 6577 if (!committed)
6457 6578 buf->dtb_offset = offs + ecb->dte_size;
6458 6579 }
6459 6580
6460 6581 end = dtrace_gethrtime();
6461 6582 if (vtime)
6462 6583 curthread->t_dtrace_start = end;
6463 6584
6464 6585 CPU->cpu_dtrace_nsec += end - now;
6465 6586
6466 6587 dtrace_interrupt_enable(cookie);
6467 6588 }
6468 6589
6469 6590 /*
6470 6591 * DTrace Probe Hashing Functions
6471 6592 *
6472 6593 * The functions in this section (and indeed, the functions in remaining
6473 6594 * sections) are not _called_ from probe context. (Any exceptions to this are
6474 6595 * marked with a "Note:".) Rather, they are called from elsewhere in the
6475 6596 * DTrace framework to look-up probes in, add probes to and remove probes from
6476 6597 * the DTrace probe hashes. (Each probe is hashed by each element of the
6477 6598 * probe tuple -- allowing for fast lookups, regardless of what was
6478 6599 * specified.)
6479 6600 */
6480 6601 static uint_t
6481 6602 dtrace_hash_str(char *p)
6482 6603 {
6483 6604 unsigned int g;
6484 6605 uint_t hval = 0;
6485 6606
6486 6607 while (*p) {
6487 6608 hval = (hval << 4) + *p++;
6488 6609 if ((g = (hval & 0xf0000000)) != 0)
6489 6610 hval ^= g >> 24;
6490 6611 hval &= ~g;
6491 6612 }
6492 6613 return (hval);
6493 6614 }
6494 6615
6495 6616 static dtrace_hash_t *
6496 6617 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
6497 6618 {
6498 6619 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
6499 6620
6500 6621 hash->dth_stroffs = stroffs;
6501 6622 hash->dth_nextoffs = nextoffs;
6502 6623 hash->dth_prevoffs = prevoffs;
6503 6624
6504 6625 hash->dth_size = 1;
6505 6626 hash->dth_mask = hash->dth_size - 1;
6506 6627
6507 6628 hash->dth_tab = kmem_zalloc(hash->dth_size *
6508 6629 sizeof (dtrace_hashbucket_t *), KM_SLEEP);
6509 6630
6510 6631 return (hash);
6511 6632 }
6512 6633
6513 6634 static void
6514 6635 dtrace_hash_destroy(dtrace_hash_t *hash)
6515 6636 {
6516 6637 #ifdef DEBUG
6517 6638 int i;
6518 6639
6519 6640 for (i = 0; i < hash->dth_size; i++)
6520 6641 ASSERT(hash->dth_tab[i] == NULL);
6521 6642 #endif
6522 6643
6523 6644 kmem_free(hash->dth_tab,
6524 6645 hash->dth_size * sizeof (dtrace_hashbucket_t *));
6525 6646 kmem_free(hash, sizeof (dtrace_hash_t));
6526 6647 }
6527 6648
6528 6649 static void
6529 6650 dtrace_hash_resize(dtrace_hash_t *hash)
6530 6651 {
6531 6652 int size = hash->dth_size, i, ndx;
6532 6653 int new_size = hash->dth_size << 1;
6533 6654 int new_mask = new_size - 1;
6534 6655 dtrace_hashbucket_t **new_tab, *bucket, *next;
6535 6656
6536 6657 ASSERT((new_size & new_mask) == 0);
6537 6658
6538 6659 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
6539 6660
6540 6661 for (i = 0; i < size; i++) {
6541 6662 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
6542 6663 dtrace_probe_t *probe = bucket->dthb_chain;
6543 6664
6544 6665 ASSERT(probe != NULL);
6545 6666 ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
6546 6667
6547 6668 next = bucket->dthb_next;
6548 6669 bucket->dthb_next = new_tab[ndx];
6549 6670 new_tab[ndx] = bucket;
6550 6671 }
6551 6672 }
6552 6673
6553 6674 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
6554 6675 hash->dth_tab = new_tab;
6555 6676 hash->dth_size = new_size;
6556 6677 hash->dth_mask = new_mask;
6557 6678 }
6558 6679
6559 6680 static void
6560 6681 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
6561 6682 {
6562 6683 int hashval = DTRACE_HASHSTR(hash, new);
6563 6684 int ndx = hashval & hash->dth_mask;
6564 6685 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6565 6686 dtrace_probe_t **nextp, **prevp;
6566 6687
6567 6688 for (; bucket != NULL; bucket = bucket->dthb_next) {
6568 6689 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
6569 6690 goto add;
6570 6691 }
6571 6692
6572 6693 if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
6573 6694 dtrace_hash_resize(hash);
6574 6695 dtrace_hash_add(hash, new);
6575 6696 return;
6576 6697 }
6577 6698
6578 6699 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
6579 6700 bucket->dthb_next = hash->dth_tab[ndx];
6580 6701 hash->dth_tab[ndx] = bucket;
6581 6702 hash->dth_nbuckets++;
6582 6703
6583 6704 add:
6584 6705 nextp = DTRACE_HASHNEXT(hash, new);
6585 6706 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
6586 6707 *nextp = bucket->dthb_chain;
6587 6708
6588 6709 if (bucket->dthb_chain != NULL) {
6589 6710 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
6590 6711 ASSERT(*prevp == NULL);
6591 6712 *prevp = new;
6592 6713 }
6593 6714
6594 6715 bucket->dthb_chain = new;
6595 6716 bucket->dthb_len++;
6596 6717 }
6597 6718
6598 6719 static dtrace_probe_t *
6599 6720 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
6600 6721 {
6601 6722 int hashval = DTRACE_HASHSTR(hash, template);
6602 6723 int ndx = hashval & hash->dth_mask;
6603 6724 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6604 6725
6605 6726 for (; bucket != NULL; bucket = bucket->dthb_next) {
6606 6727 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
6607 6728 return (bucket->dthb_chain);
6608 6729 }
6609 6730
6610 6731 return (NULL);
6611 6732 }
6612 6733
6613 6734 static int
6614 6735 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
6615 6736 {
6616 6737 int hashval = DTRACE_HASHSTR(hash, template);
6617 6738 int ndx = hashval & hash->dth_mask;
6618 6739 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6619 6740
6620 6741 for (; bucket != NULL; bucket = bucket->dthb_next) {
6621 6742 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
6622 6743 return (bucket->dthb_len);
6623 6744 }
6624 6745
6625 6746 return (NULL);
6626 6747 }
6627 6748
6628 6749 static void
6629 6750 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
6630 6751 {
6631 6752 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
6632 6753 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
6633 6754
6634 6755 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
6635 6756 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
6636 6757
6637 6758 /*
6638 6759 * Find the bucket that we're removing this probe from.
6639 6760 */
6640 6761 for (; bucket != NULL; bucket = bucket->dthb_next) {
6641 6762 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
6642 6763 break;
6643 6764 }
6644 6765
6645 6766 ASSERT(bucket != NULL);
6646 6767
6647 6768 if (*prevp == NULL) {
6648 6769 if (*nextp == NULL) {
6649 6770 /*
6650 6771 * The removed probe was the only probe on this
6651 6772 * bucket; we need to remove the bucket.
6652 6773 */
6653 6774 dtrace_hashbucket_t *b = hash->dth_tab[ndx];
6654 6775
6655 6776 ASSERT(bucket->dthb_chain == probe);
6656 6777 ASSERT(b != NULL);
6657 6778
6658 6779 if (b == bucket) {
6659 6780 hash->dth_tab[ndx] = bucket->dthb_next;
6660 6781 } else {
6661 6782 while (b->dthb_next != bucket)
6662 6783 b = b->dthb_next;
6663 6784 b->dthb_next = bucket->dthb_next;
6664 6785 }
6665 6786
6666 6787 ASSERT(hash->dth_nbuckets > 0);
6667 6788 hash->dth_nbuckets--;
6668 6789 kmem_free(bucket, sizeof (dtrace_hashbucket_t));
6669 6790 return;
6670 6791 }
6671 6792
6672 6793 bucket->dthb_chain = *nextp;
6673 6794 } else {
6674 6795 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
6675 6796 }
6676 6797
6677 6798 if (*nextp != NULL)
6678 6799 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
6679 6800 }
6680 6801
6681 6802 /*
6682 6803 * DTrace Utility Functions
6683 6804 *
6684 6805 * These are random utility functions that are _not_ called from probe context.
6685 6806 */
6686 6807 static int
6687 6808 dtrace_badattr(const dtrace_attribute_t *a)
6688 6809 {
6689 6810 return (a->dtat_name > DTRACE_STABILITY_MAX ||
6690 6811 a->dtat_data > DTRACE_STABILITY_MAX ||
6691 6812 a->dtat_class > DTRACE_CLASS_MAX);
6692 6813 }
6693 6814
6694 6815 /*
6695 6816 * Return a duplicate copy of a string. If the specified string is NULL,
6696 6817 * this function returns a zero-length string.
6697 6818 */
6698 6819 static char *
6699 6820 dtrace_strdup(const char *str)
6700 6821 {
6701 6822 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
6702 6823
6703 6824 if (str != NULL)
6704 6825 (void) strcpy(new, str);
6705 6826
6706 6827 return (new);
6707 6828 }
6708 6829
6709 6830 #define DTRACE_ISALPHA(c) \
6710 6831 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6711 6832
6712 6833 static int
6713 6834 dtrace_badname(const char *s)
6714 6835 {
6715 6836 char c;
6716 6837
6717 6838 if (s == NULL || (c = *s++) == '\0')
6718 6839 return (0);
6719 6840
6720 6841 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
6721 6842 return (1);
6722 6843
6723 6844 while ((c = *s++) != '\0') {
6724 6845 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
6725 6846 c != '-' && c != '_' && c != '.' && c != '`')
6726 6847 return (1);
6727 6848 }
6728 6849
6729 6850 return (0);
6730 6851 }
6731 6852
6732 6853 static void
6733 6854 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
6734 6855 {
6735 6856 uint32_t priv;
6736 6857
6737 6858 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
6738 6859 /*
6739 6860 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6740 6861 */
6741 6862 priv = DTRACE_PRIV_ALL;
6742 6863 } else {
6743 6864 *uidp = crgetuid(cr);
6744 6865 *zoneidp = crgetzoneid(cr);
6745 6866
6746 6867 priv = 0;
6747 6868 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
6748 6869 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
6749 6870 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
6750 6871 priv |= DTRACE_PRIV_USER;
6751 6872 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
6752 6873 priv |= DTRACE_PRIV_PROC;
6753 6874 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
6754 6875 priv |= DTRACE_PRIV_OWNER;
6755 6876 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
6756 6877 priv |= DTRACE_PRIV_ZONEOWNER;
6757 6878 }
6758 6879
6759 6880 *privp = priv;
6760 6881 }
6761 6882
6762 6883 #ifdef DTRACE_ERRDEBUG
6763 6884 static void
6764 6885 dtrace_errdebug(const char *str)
6765 6886 {
6766 6887 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ;
6767 6888 int occupied = 0;
6768 6889
6769 6890 mutex_enter(&dtrace_errlock);
6770 6891 dtrace_errlast = str;
6771 6892 dtrace_errthread = curthread;
6772 6893
6773 6894 while (occupied++ < DTRACE_ERRHASHSZ) {
6774 6895 if (dtrace_errhash[hval].dter_msg == str) {
6775 6896 dtrace_errhash[hval].dter_count++;
6776 6897 goto out;
6777 6898 }
6778 6899
6779 6900 if (dtrace_errhash[hval].dter_msg != NULL) {
6780 6901 hval = (hval + 1) % DTRACE_ERRHASHSZ;
6781 6902 continue;
6782 6903 }
6783 6904
6784 6905 dtrace_errhash[hval].dter_msg = str;
6785 6906 dtrace_errhash[hval].dter_count = 1;
6786 6907 goto out;
6787 6908 }
6788 6909
6789 6910 panic("dtrace: undersized error hash");
6790 6911 out:
6791 6912 mutex_exit(&dtrace_errlock);
6792 6913 }
6793 6914 #endif
6794 6915
6795 6916 /*
6796 6917 * DTrace Matching Functions
6797 6918 *
6798 6919 * These functions are used to match groups of probes, given some elements of
6799 6920 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6800 6921 */
6801 6922 static int
6802 6923 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
6803 6924 zoneid_t zoneid)
6804 6925 {
6805 6926 if (priv != DTRACE_PRIV_ALL) {
6806 6927 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
6807 6928 uint32_t match = priv & ppriv;
6808 6929
6809 6930 /*
6810 6931 * No PRIV_DTRACE_* privileges...
6811 6932 */
6812 6933 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
6813 6934 DTRACE_PRIV_KERNEL)) == 0)
6814 6935 return (0);
6815 6936
6816 6937 /*
6817 6938 * No matching bits, but there were bits to match...
6818 6939 */
6819 6940 if (match == 0 && ppriv != 0)
6820 6941 return (0);
6821 6942
6822 6943 /*
6823 6944 * Need to have permissions to the process, but don't...
6824 6945 */
6825 6946 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
6826 6947 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
6827 6948 return (0);
6828 6949 }
6829 6950
6830 6951 /*
6831 6952 * Need to be in the same zone unless we possess the
6832 6953 * privilege to examine all zones.
6833 6954 */
6834 6955 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
6835 6956 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
6836 6957 return (0);
6837 6958 }
6838 6959 }
6839 6960
6840 6961 return (1);
6841 6962 }
6842 6963
6843 6964 /*
6844 6965 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6845 6966 * consists of input pattern strings and an ops-vector to evaluate them.
6846 6967 * This function returns >0 for match, 0 for no match, and <0 for error.
6847 6968 */
6848 6969 static int
6849 6970 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
6850 6971 uint32_t priv, uid_t uid, zoneid_t zoneid)
6851 6972 {
6852 6973 dtrace_provider_t *pvp = prp->dtpr_provider;
6853 6974 int rv;
6854 6975
6855 6976 if (pvp->dtpv_defunct)
6856 6977 return (0);
6857 6978
6858 6979 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
6859 6980 return (rv);
6860 6981
6861 6982 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
6862 6983 return (rv);
6863 6984
6864 6985 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
6865 6986 return (rv);
6866 6987
6867 6988 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
6868 6989 return (rv);
6869 6990
6870 6991 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
6871 6992 return (0);
6872 6993
6873 6994 return (rv);
6874 6995 }
6875 6996
6876 6997 /*
6877 6998 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6878 6999 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6879 7000 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6880 7001 * In addition, all of the recursion cases except for '*' matching have been
6881 7002 * unwound. For '*', we still implement recursive evaluation, but a depth
6882 7003 * counter is maintained and matching is aborted if we recurse too deep.
6883 7004 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6884 7005 */
6885 7006 static int
6886 7007 dtrace_match_glob(const char *s, const char *p, int depth)
6887 7008 {
6888 7009 const char *olds;
6889 7010 char s1, c;
6890 7011 int gs;
6891 7012
6892 7013 if (depth > DTRACE_PROBEKEY_MAXDEPTH)
6893 7014 return (-1);
6894 7015
6895 7016 if (s == NULL)
6896 7017 s = ""; /* treat NULL as empty string */
6897 7018
6898 7019 top:
6899 7020 olds = s;
6900 7021 s1 = *s++;
6901 7022
6902 7023 if (p == NULL)
6903 7024 return (0);
6904 7025
6905 7026 if ((c = *p++) == '\0')
6906 7027 return (s1 == '\0');
6907 7028
6908 7029 switch (c) {
6909 7030 case '[': {
6910 7031 int ok = 0, notflag = 0;
6911 7032 char lc = '\0';
6912 7033
6913 7034 if (s1 == '\0')
6914 7035 return (0);
6915 7036
6916 7037 if (*p == '!') {
6917 7038 notflag = 1;
6918 7039 p++;
6919 7040 }
6920 7041
6921 7042 if ((c = *p++) == '\0')
6922 7043 return (0);
6923 7044
6924 7045 do {
6925 7046 if (c == '-' && lc != '\0' && *p != ']') {
6926 7047 if ((c = *p++) == '\0')
6927 7048 return (0);
6928 7049 if (c == '\\' && (c = *p++) == '\0')
6929 7050 return (0);
6930 7051
6931 7052 if (notflag) {
6932 7053 if (s1 < lc || s1 > c)
6933 7054 ok++;
6934 7055 else
6935 7056 return (0);
6936 7057 } else if (lc <= s1 && s1 <= c)
6937 7058 ok++;
6938 7059
6939 7060 } else if (c == '\\' && (c = *p++) == '\0')
6940 7061 return (0);
6941 7062
6942 7063 lc = c; /* save left-hand 'c' for next iteration */
6943 7064
6944 7065 if (notflag) {
6945 7066 if (s1 != c)
6946 7067 ok++;
6947 7068 else
6948 7069 return (0);
6949 7070 } else if (s1 == c)
6950 7071 ok++;
6951 7072
6952 7073 if ((c = *p++) == '\0')
6953 7074 return (0);
6954 7075
6955 7076 } while (c != ']');
6956 7077
6957 7078 if (ok)
6958 7079 goto top;
6959 7080
6960 7081 return (0);
6961 7082 }
6962 7083
6963 7084 case '\\':
6964 7085 if ((c = *p++) == '\0')
6965 7086 return (0);
6966 7087 /*FALLTHRU*/
6967 7088
6968 7089 default:
6969 7090 if (c != s1)
6970 7091 return (0);
6971 7092 /*FALLTHRU*/
6972 7093
6973 7094 case '?':
6974 7095 if (s1 != '\0')
6975 7096 goto top;
6976 7097 return (0);
6977 7098
6978 7099 case '*':
6979 7100 while (*p == '*')
6980 7101 p++; /* consecutive *'s are identical to a single one */
6981 7102
6982 7103 if (*p == '\0')
6983 7104 return (1);
6984 7105
6985 7106 for (s = olds; *s != '\0'; s++) {
6986 7107 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
6987 7108 return (gs);
6988 7109 }
6989 7110
6990 7111 return (0);
6991 7112 }
6992 7113 }
6993 7114
6994 7115 /*ARGSUSED*/
6995 7116 static int
6996 7117 dtrace_match_string(const char *s, const char *p, int depth)
6997 7118 {
6998 7119 return (s != NULL && strcmp(s, p) == 0);
6999 7120 }
7000 7121
7001 7122 /*ARGSUSED*/
7002 7123 static int
7003 7124 dtrace_match_nul(const char *s, const char *p, int depth)
7004 7125 {
7005 7126 return (1); /* always match the empty pattern */
7006 7127 }
7007 7128
7008 7129 /*ARGSUSED*/
7009 7130 static int
7010 7131 dtrace_match_nonzero(const char *s, const char *p, int depth)
7011 7132 {
7012 7133 return (s != NULL && s[0] != '\0');
7013 7134 }
7014 7135
7015 7136 static int
7016 7137 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
7017 7138 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
7018 7139 {
7019 7140 dtrace_probe_t template, *probe;
7020 7141 dtrace_hash_t *hash = NULL;
7021 7142 int len, rc, best = INT_MAX, nmatched = 0;
7022 7143 dtrace_id_t i;
7023 7144
7024 7145 ASSERT(MUTEX_HELD(&dtrace_lock));
7025 7146
7026 7147 /*
7027 7148 * If the probe ID is specified in the key, just lookup by ID and
7028 7149 * invoke the match callback once if a matching probe is found.
7029 7150 */
7030 7151 if (pkp->dtpk_id != DTRACE_IDNONE) {
7031 7152 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
7032 7153 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
7033 7154 if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
7034 7155 return (DTRACE_MATCH_FAIL);
7035 7156 nmatched++;
7036 7157 }
7037 7158 return (nmatched);
7038 7159 }
7039 7160
7040 7161 template.dtpr_mod = (char *)pkp->dtpk_mod;
7041 7162 template.dtpr_func = (char *)pkp->dtpk_func;
7042 7163 template.dtpr_name = (char *)pkp->dtpk_name;
7043 7164
7044 7165 /*
7045 7166 * We want to find the most distinct of the module name, function
7046 7167 * name, and name. So for each one that is not a glob pattern or
7047 7168 * empty string, we perform a lookup in the corresponding hash and
7048 7169 * use the hash table with the fewest collisions to do our search.
7049 7170 */
7050 7171 if (pkp->dtpk_mmatch == &dtrace_match_string &&
7051 7172 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
7052 7173 best = len;
7053 7174 hash = dtrace_bymod;
7054 7175 }
7055 7176
7056 7177 if (pkp->dtpk_fmatch == &dtrace_match_string &&
7057 7178 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
7058 7179 best = len;
7059 7180 hash = dtrace_byfunc;
7060 7181 }
7061 7182
7062 7183 if (pkp->dtpk_nmatch == &dtrace_match_string &&
7063 7184 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
7064 7185 best = len;
7065 7186 hash = dtrace_byname;
7066 7187 }
7067 7188
7068 7189 /*
7069 7190 * If we did not select a hash table, iterate over every probe and
7070 7191 * invoke our callback for each one that matches our input probe key.
7071 7192 */
7072 7193 if (hash == NULL) {
7073 7194 for (i = 0; i < dtrace_nprobes; i++) {
7074 7195 if ((probe = dtrace_probes[i]) == NULL ||
7075 7196 dtrace_match_probe(probe, pkp, priv, uid,
7076 7197 zoneid) <= 0)
7077 7198 continue;
7078 7199
7079 7200 nmatched++;
7080 7201
7081 7202 if ((rc = (*matched)(probe, arg)) !=
7082 7203 DTRACE_MATCH_NEXT) {
7083 7204 if (rc == DTRACE_MATCH_FAIL)
7084 7205 return (DTRACE_MATCH_FAIL);
7085 7206 break;
7086 7207 }
7087 7208 }
7088 7209
7089 7210 return (nmatched);
7090 7211 }
7091 7212
7092 7213 /*
7093 7214 * If we selected a hash table, iterate over each probe of the same key
7094 7215 * name and invoke the callback for every probe that matches the other
7095 7216 * attributes of our input probe key.
7096 7217 */
7097 7218 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
7098 7219 probe = *(DTRACE_HASHNEXT(hash, probe))) {
7099 7220
7100 7221 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
7101 7222 continue;
7102 7223
7103 7224 nmatched++;
7104 7225
7105 7226 if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
7106 7227 if (rc == DTRACE_MATCH_FAIL)
7107 7228 return (DTRACE_MATCH_FAIL);
7108 7229 break;
7109 7230 }
7110 7231 }
7111 7232
7112 7233 return (nmatched);
7113 7234 }
7114 7235
7115 7236 /*
7116 7237 * Return the function pointer dtrace_probecmp() should use to compare the
7117 7238 * specified pattern with a string. For NULL or empty patterns, we select
7118 7239 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7119 7240 * For non-empty non-glob strings, we use dtrace_match_string().
7120 7241 */
7121 7242 static dtrace_probekey_f *
7122 7243 dtrace_probekey_func(const char *p)
7123 7244 {
7124 7245 char c;
7125 7246
7126 7247 if (p == NULL || *p == '\0')
7127 7248 return (&dtrace_match_nul);
7128 7249
7129 7250 while ((c = *p++) != '\0') {
7130 7251 if (c == '[' || c == '?' || c == '*' || c == '\\')
7131 7252 return (&dtrace_match_glob);
7132 7253 }
7133 7254
7134 7255 return (&dtrace_match_string);
7135 7256 }
7136 7257
7137 7258 /*
7138 7259 * Build a probe comparison key for use with dtrace_match_probe() from the
7139 7260 * given probe description. By convention, a null key only matches anchored
7140 7261 * probes: if each field is the empty string, reset dtpk_fmatch to
7141 7262 * dtrace_match_nonzero().
7142 7263 */
7143 7264 static void
7144 7265 dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
7145 7266 {
7146 7267 pkp->dtpk_prov = pdp->dtpd_provider;
7147 7268 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
7148 7269
7149 7270 pkp->dtpk_mod = pdp->dtpd_mod;
7150 7271 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
7151 7272
7152 7273 pkp->dtpk_func = pdp->dtpd_func;
7153 7274 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
7154 7275
7155 7276 pkp->dtpk_name = pdp->dtpd_name;
7156 7277 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
7157 7278
7158 7279 pkp->dtpk_id = pdp->dtpd_id;
7159 7280
7160 7281 if (pkp->dtpk_id == DTRACE_IDNONE &&
7161 7282 pkp->dtpk_pmatch == &dtrace_match_nul &&
7162 7283 pkp->dtpk_mmatch == &dtrace_match_nul &&
7163 7284 pkp->dtpk_fmatch == &dtrace_match_nul &&
7164 7285 pkp->dtpk_nmatch == &dtrace_match_nul)
7165 7286 pkp->dtpk_fmatch = &dtrace_match_nonzero;
7166 7287 }
7167 7288
7168 7289 /*
7169 7290 * DTrace Provider-to-Framework API Functions
7170 7291 *
7171 7292 * These functions implement much of the Provider-to-Framework API, as
7172 7293 * described in <sys/dtrace.h>. The parts of the API not in this section are
7173 7294 * the functions in the API for probe management (found below), and
7174 7295 * dtrace_probe() itself (found above).
7175 7296 */
7176 7297
7177 7298 /*
7178 7299 * Register the calling provider with the DTrace framework. This should
7179 7300 * generally be called by DTrace providers in their attach(9E) entry point.
7180 7301 */
7181 7302 int
7182 7303 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
7183 7304 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
7184 7305 {
7185 7306 dtrace_provider_t *provider;
7186 7307
7187 7308 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
7188 7309 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7189 7310 "arguments", name ? name : "<NULL>");
7190 7311 return (EINVAL);
7191 7312 }
7192 7313
7193 7314 if (name[0] == '\0' || dtrace_badname(name)) {
7194 7315 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7195 7316 "provider name", name);
7196 7317 return (EINVAL);
7197 7318 }
7198 7319
7199 7320 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
7200 7321 pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
7201 7322 pops->dtps_destroy == NULL ||
7202 7323 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
7203 7324 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7204 7325 "provider ops", name);
7205 7326 return (EINVAL);
7206 7327 }
7207 7328
7208 7329 if (dtrace_badattr(&pap->dtpa_provider) ||
7209 7330 dtrace_badattr(&pap->dtpa_mod) ||
7210 7331 dtrace_badattr(&pap->dtpa_func) ||
7211 7332 dtrace_badattr(&pap->dtpa_name) ||
7212 7333 dtrace_badattr(&pap->dtpa_args)) {
7213 7334 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7214 7335 "provider attributes", name);
7215 7336 return (EINVAL);
7216 7337 }
7217 7338
7218 7339 if (priv & ~DTRACE_PRIV_ALL) {
7219 7340 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7220 7341 "privilege attributes", name);
7221 7342 return (EINVAL);
7222 7343 }
7223 7344
7224 7345 if ((priv & DTRACE_PRIV_KERNEL) &&
7225 7346 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
7226 7347 pops->dtps_mode == NULL) {
7227 7348 cmn_err(CE_WARN, "failed to register provider '%s': need "
7228 7349 "dtps_mode() op for given privilege attributes", name);
7229 7350 return (EINVAL);
7230 7351 }
7231 7352
7232 7353 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
7233 7354 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
7234 7355 (void) strcpy(provider->dtpv_name, name);
7235 7356
7236 7357 provider->dtpv_attr = *pap;
7237 7358 provider->dtpv_priv.dtpp_flags = priv;
7238 7359 if (cr != NULL) {
7239 7360 provider->dtpv_priv.dtpp_uid = crgetuid(cr);
7240 7361 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
7241 7362 }
7242 7363 provider->dtpv_pops = *pops;
7243 7364
7244 7365 if (pops->dtps_provide == NULL) {
7245 7366 ASSERT(pops->dtps_provide_module != NULL);
7246 7367 provider->dtpv_pops.dtps_provide =
7247 7368 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop;
7248 7369 }
7249 7370
7250 7371 if (pops->dtps_provide_module == NULL) {
7251 7372 ASSERT(pops->dtps_provide != NULL);
7252 7373 provider->dtpv_pops.dtps_provide_module =
7253 7374 (void (*)(void *, struct modctl *))dtrace_nullop;
7254 7375 }
7255 7376
7256 7377 if (pops->dtps_suspend == NULL) {
7257 7378 ASSERT(pops->dtps_resume == NULL);
7258 7379 provider->dtpv_pops.dtps_suspend =
7259 7380 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7260 7381 provider->dtpv_pops.dtps_resume =
7261 7382 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7262 7383 }
7263 7384
7264 7385 provider->dtpv_arg = arg;
7265 7386 *idp = (dtrace_provider_id_t)provider;
7266 7387
7267 7388 if (pops == &dtrace_provider_ops) {
7268 7389 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7269 7390 ASSERT(MUTEX_HELD(&dtrace_lock));
7270 7391 ASSERT(dtrace_anon.dta_enabling == NULL);
7271 7392
7272 7393 /*
7273 7394 * We make sure that the DTrace provider is at the head of
7274 7395 * the provider chain.
7275 7396 */
7276 7397 provider->dtpv_next = dtrace_provider;
7277 7398 dtrace_provider = provider;
7278 7399 return (0);
7279 7400 }
7280 7401
7281 7402 mutex_enter(&dtrace_provider_lock);
7282 7403 mutex_enter(&dtrace_lock);
7283 7404
7284 7405 /*
7285 7406 * If there is at least one provider registered, we'll add this
7286 7407 * provider after the first provider.
7287 7408 */
7288 7409 if (dtrace_provider != NULL) {
7289 7410 provider->dtpv_next = dtrace_provider->dtpv_next;
7290 7411 dtrace_provider->dtpv_next = provider;
7291 7412 } else {
7292 7413 dtrace_provider = provider;
7293 7414 }
7294 7415
7295 7416 if (dtrace_retained != NULL) {
7296 7417 dtrace_enabling_provide(provider);
7297 7418
7298 7419 /*
7299 7420 * Now we need to call dtrace_enabling_matchall() -- which
7300 7421 * will acquire cpu_lock and dtrace_lock. We therefore need
7301 7422 * to drop all of our locks before calling into it...
7302 7423 */
7303 7424 mutex_exit(&dtrace_lock);
7304 7425 mutex_exit(&dtrace_provider_lock);
7305 7426 dtrace_enabling_matchall();
7306 7427
7307 7428 return (0);
7308 7429 }
7309 7430
7310 7431 mutex_exit(&dtrace_lock);
7311 7432 mutex_exit(&dtrace_provider_lock);
7312 7433
7313 7434 return (0);
7314 7435 }
7315 7436
7316 7437 /*
7317 7438 * Unregister the specified provider from the DTrace framework. This should
7318 7439 * generally be called by DTrace providers in their detach(9E) entry point.
7319 7440 */
7320 7441 int
7321 7442 dtrace_unregister(dtrace_provider_id_t id)
7322 7443 {
7323 7444 dtrace_provider_t *old = (dtrace_provider_t *)id;
7324 7445 dtrace_provider_t *prev = NULL;
7325 7446 int i, self = 0, noreap = 0;
7326 7447 dtrace_probe_t *probe, *first = NULL;
7327 7448
7328 7449 if (old->dtpv_pops.dtps_enable ==
7329 7450 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
7330 7451 /*
7331 7452 * If DTrace itself is the provider, we're called with locks
7332 7453 * already held.
7333 7454 */
7334 7455 ASSERT(old == dtrace_provider);
7335 7456 ASSERT(dtrace_devi != NULL);
7336 7457 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7337 7458 ASSERT(MUTEX_HELD(&dtrace_lock));
7338 7459 self = 1;
7339 7460
7340 7461 if (dtrace_provider->dtpv_next != NULL) {
7341 7462 /*
7342 7463 * There's another provider here; return failure.
7343 7464 */
7344 7465 return (EBUSY);
7345 7466 }
7346 7467 } else {
7347 7468 mutex_enter(&dtrace_provider_lock);
7348 7469 mutex_enter(&mod_lock);
7349 7470 mutex_enter(&dtrace_lock);
7350 7471 }
7351 7472
7352 7473 /*
7353 7474 * If anyone has /dev/dtrace open, or if there are anonymous enabled
7354 7475 * probes, we refuse to let providers slither away, unless this
7355 7476 * provider has already been explicitly invalidated.
7356 7477 */
7357 7478 if (!old->dtpv_defunct &&
7358 7479 (dtrace_opens || (dtrace_anon.dta_state != NULL &&
7359 7480 dtrace_anon.dta_state->dts_necbs > 0))) {
7360 7481 if (!self) {
7361 7482 mutex_exit(&dtrace_lock);
7362 7483 mutex_exit(&mod_lock);
7363 7484 mutex_exit(&dtrace_provider_lock);
7364 7485 }
7365 7486 return (EBUSY);
7366 7487 }
7367 7488
7368 7489 /*
7369 7490 * Attempt to destroy the probes associated with this provider.
7370 7491 */
7371 7492 for (i = 0; i < dtrace_nprobes; i++) {
7372 7493 if ((probe = dtrace_probes[i]) == NULL)
7373 7494 continue;
7374 7495
7375 7496 if (probe->dtpr_provider != old)
7376 7497 continue;
7377 7498
7378 7499 if (probe->dtpr_ecb == NULL)
7379 7500 continue;
7380 7501
7381 7502 /*
7382 7503 * If we are trying to unregister a defunct provider, and the
7383 7504 * provider was made defunct within the interval dictated by
7384 7505 * dtrace_unregister_defunct_reap, we'll (asynchronously)
7385 7506 * attempt to reap our enablings. To denote that the provider
7386 7507 * should reattempt to unregister itself at some point in the
7387 7508 * future, we will return a differentiable error code (EAGAIN
7388 7509 * instead of EBUSY) in this case.
7389 7510 */
7390 7511 if (dtrace_gethrtime() - old->dtpv_defunct >
7391 7512 dtrace_unregister_defunct_reap)
7392 7513 noreap = 1;
7393 7514
7394 7515 if (!self) {
7395 7516 mutex_exit(&dtrace_lock);
7396 7517 mutex_exit(&mod_lock);
7397 7518 mutex_exit(&dtrace_provider_lock);
7398 7519 }
7399 7520
7400 7521 if (noreap)
7401 7522 return (EBUSY);
7402 7523
7403 7524 (void) taskq_dispatch(dtrace_taskq,
7404 7525 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
7405 7526
7406 7527 return (EAGAIN);
7407 7528 }
7408 7529
7409 7530 /*
7410 7531 * All of the probes for this provider are disabled; we can safely
7411 7532 * remove all of them from their hash chains and from the probe array.
7412 7533 */
7413 7534 for (i = 0; i < dtrace_nprobes; i++) {
7414 7535 if ((probe = dtrace_probes[i]) == NULL)
7415 7536 continue;
7416 7537
7417 7538 if (probe->dtpr_provider != old)
7418 7539 continue;
7419 7540
7420 7541 dtrace_probes[i] = NULL;
7421 7542
7422 7543 dtrace_hash_remove(dtrace_bymod, probe);
7423 7544 dtrace_hash_remove(dtrace_byfunc, probe);
7424 7545 dtrace_hash_remove(dtrace_byname, probe);
7425 7546
7426 7547 if (first == NULL) {
7427 7548 first = probe;
7428 7549 probe->dtpr_nextmod = NULL;
7429 7550 } else {
7430 7551 probe->dtpr_nextmod = first;
7431 7552 first = probe;
7432 7553 }
7433 7554 }
7434 7555
7435 7556 /*
7436 7557 * The provider's probes have been removed from the hash chains and
7437 7558 * from the probe array. Now issue a dtrace_sync() to be sure that
7438 7559 * everyone has cleared out from any probe array processing.
7439 7560 */
7440 7561 dtrace_sync();
7441 7562
7442 7563 for (probe = first; probe != NULL; probe = first) {
7443 7564 first = probe->dtpr_nextmod;
7444 7565
7445 7566 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
7446 7567 probe->dtpr_arg);
7447 7568 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7448 7569 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7449 7570 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7450 7571 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
7451 7572 kmem_free(probe, sizeof (dtrace_probe_t));
7452 7573 }
7453 7574
7454 7575 if ((prev = dtrace_provider) == old) {
7455 7576 ASSERT(self || dtrace_devi == NULL);
7456 7577 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
7457 7578 dtrace_provider = old->dtpv_next;
7458 7579 } else {
7459 7580 while (prev != NULL && prev->dtpv_next != old)
7460 7581 prev = prev->dtpv_next;
7461 7582
7462 7583 if (prev == NULL) {
7463 7584 panic("attempt to unregister non-existent "
7464 7585 "dtrace provider %p\n", (void *)id);
7465 7586 }
7466 7587
7467 7588 prev->dtpv_next = old->dtpv_next;
7468 7589 }
7469 7590
7470 7591 if (!self) {
7471 7592 mutex_exit(&dtrace_lock);
7472 7593 mutex_exit(&mod_lock);
7473 7594 mutex_exit(&dtrace_provider_lock);
7474 7595 }
7475 7596
7476 7597 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
7477 7598 kmem_free(old, sizeof (dtrace_provider_t));
7478 7599
7479 7600 return (0);
7480 7601 }
7481 7602
7482 7603 /*
7483 7604 * Invalidate the specified provider. All subsequent probe lookups for the
7484 7605 * specified provider will fail, but its probes will not be removed.
7485 7606 */
7486 7607 void
7487 7608 dtrace_invalidate(dtrace_provider_id_t id)
7488 7609 {
7489 7610 dtrace_provider_t *pvp = (dtrace_provider_t *)id;
7490 7611
7491 7612 ASSERT(pvp->dtpv_pops.dtps_enable !=
7492 7613 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
7493 7614
7494 7615 mutex_enter(&dtrace_provider_lock);
7495 7616 mutex_enter(&dtrace_lock);
7496 7617
7497 7618 pvp->dtpv_defunct = dtrace_gethrtime();
7498 7619
7499 7620 mutex_exit(&dtrace_lock);
7500 7621 mutex_exit(&dtrace_provider_lock);
7501 7622 }
7502 7623
7503 7624 /*
7504 7625 * Indicate whether or not DTrace has attached.
7505 7626 */
7506 7627 int
7507 7628 dtrace_attached(void)
7508 7629 {
7509 7630 /*
7510 7631 * dtrace_provider will be non-NULL iff the DTrace driver has
7511 7632 * attached. (It's non-NULL because DTrace is always itself a
7512 7633 * provider.)
7513 7634 */
7514 7635 return (dtrace_provider != NULL);
7515 7636 }
7516 7637
7517 7638 /*
7518 7639 * Remove all the unenabled probes for the given provider. This function is
7519 7640 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7520 7641 * -- just as many of its associated probes as it can.
7521 7642 */
7522 7643 int
7523 7644 dtrace_condense(dtrace_provider_id_t id)
7524 7645 {
7525 7646 dtrace_provider_t *prov = (dtrace_provider_t *)id;
7526 7647 int i;
7527 7648 dtrace_probe_t *probe;
7528 7649
7529 7650 /*
7530 7651 * Make sure this isn't the dtrace provider itself.
7531 7652 */
7532 7653 ASSERT(prov->dtpv_pops.dtps_enable !=
7533 7654 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
7534 7655
7535 7656 mutex_enter(&dtrace_provider_lock);
7536 7657 mutex_enter(&dtrace_lock);
7537 7658
7538 7659 /*
7539 7660 * Attempt to destroy the probes associated with this provider.
7540 7661 */
7541 7662 for (i = 0; i < dtrace_nprobes; i++) {
7542 7663 if ((probe = dtrace_probes[i]) == NULL)
7543 7664 continue;
7544 7665
7545 7666 if (probe->dtpr_provider != prov)
7546 7667 continue;
7547 7668
7548 7669 if (probe->dtpr_ecb != NULL)
7549 7670 continue;
7550 7671
7551 7672 dtrace_probes[i] = NULL;
7552 7673
7553 7674 dtrace_hash_remove(dtrace_bymod, probe);
7554 7675 dtrace_hash_remove(dtrace_byfunc, probe);
7555 7676 dtrace_hash_remove(dtrace_byname, probe);
7556 7677
7557 7678 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
7558 7679 probe->dtpr_arg);
7559 7680 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7560 7681 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7561 7682 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7562 7683 kmem_free(probe, sizeof (dtrace_probe_t));
7563 7684 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
7564 7685 }
7565 7686
7566 7687 mutex_exit(&dtrace_lock);
7567 7688 mutex_exit(&dtrace_provider_lock);
7568 7689
7569 7690 return (0);
7570 7691 }
7571 7692
7572 7693 /*
7573 7694 * DTrace Probe Management Functions
7574 7695 *
7575 7696 * The functions in this section perform the DTrace probe management,
7576 7697 * including functions to create probes, look-up probes, and call into the
7577 7698 * providers to request that probes be provided. Some of these functions are
7578 7699 * in the Provider-to-Framework API; these functions can be identified by the
7579 7700 * fact that they are not declared "static".
7580 7701 */
7581 7702
7582 7703 /*
7583 7704 * Create a probe with the specified module name, function name, and name.
7584 7705 */
7585 7706 dtrace_id_t
7586 7707 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
7587 7708 const char *func, const char *name, int aframes, void *arg)
7588 7709 {
7589 7710 dtrace_probe_t *probe, **probes;
7590 7711 dtrace_provider_t *provider = (dtrace_provider_t *)prov;
7591 7712 dtrace_id_t id;
7592 7713
7593 7714 if (provider == dtrace_provider) {
7594 7715 ASSERT(MUTEX_HELD(&dtrace_lock));
7595 7716 } else {
7596 7717 mutex_enter(&dtrace_lock);
7597 7718 }
7598 7719
7599 7720 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
7600 7721 VM_BESTFIT | VM_SLEEP);
7601 7722 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
7602 7723
7603 7724 probe->dtpr_id = id;
7604 7725 probe->dtpr_gen = dtrace_probegen++;
7605 7726 probe->dtpr_mod = dtrace_strdup(mod);
7606 7727 probe->dtpr_func = dtrace_strdup(func);
7607 7728 probe->dtpr_name = dtrace_strdup(name);
7608 7729 probe->dtpr_arg = arg;
7609 7730 probe->dtpr_aframes = aframes;
7610 7731 probe->dtpr_provider = provider;
7611 7732
7612 7733 dtrace_hash_add(dtrace_bymod, probe);
7613 7734 dtrace_hash_add(dtrace_byfunc, probe);
7614 7735 dtrace_hash_add(dtrace_byname, probe);
7615 7736
7616 7737 if (id - 1 >= dtrace_nprobes) {
7617 7738 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
7618 7739 size_t nsize = osize << 1;
7619 7740
7620 7741 if (nsize == 0) {
7621 7742 ASSERT(osize == 0);
7622 7743 ASSERT(dtrace_probes == NULL);
7623 7744 nsize = sizeof (dtrace_probe_t *);
7624 7745 }
7625 7746
7626 7747 probes = kmem_zalloc(nsize, KM_SLEEP);
7627 7748
7628 7749 if (dtrace_probes == NULL) {
7629 7750 ASSERT(osize == 0);
7630 7751 dtrace_probes = probes;
7631 7752 dtrace_nprobes = 1;
7632 7753 } else {
7633 7754 dtrace_probe_t **oprobes = dtrace_probes;
7634 7755
7635 7756 bcopy(oprobes, probes, osize);
7636 7757 dtrace_membar_producer();
7637 7758 dtrace_probes = probes;
7638 7759
7639 7760 dtrace_sync();
7640 7761
7641 7762 /*
7642 7763 * All CPUs are now seeing the new probes array; we can
7643 7764 * safely free the old array.
7644 7765 */
7645 7766 kmem_free(oprobes, osize);
7646 7767 dtrace_nprobes <<= 1;
7647 7768 }
7648 7769
7649 7770 ASSERT(id - 1 < dtrace_nprobes);
7650 7771 }
7651 7772
7652 7773 ASSERT(dtrace_probes[id - 1] == NULL);
7653 7774 dtrace_probes[id - 1] = probe;
7654 7775
7655 7776 if (provider != dtrace_provider)
7656 7777 mutex_exit(&dtrace_lock);
7657 7778
7658 7779 return (id);
7659 7780 }
7660 7781
7661 7782 static dtrace_probe_t *
7662 7783 dtrace_probe_lookup_id(dtrace_id_t id)
7663 7784 {
7664 7785 ASSERT(MUTEX_HELD(&dtrace_lock));
7665 7786
7666 7787 if (id == 0 || id > dtrace_nprobes)
7667 7788 return (NULL);
7668 7789
7669 7790 return (dtrace_probes[id - 1]);
7670 7791 }
7671 7792
7672 7793 static int
7673 7794 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
7674 7795 {
7675 7796 *((dtrace_id_t *)arg) = probe->dtpr_id;
7676 7797
7677 7798 return (DTRACE_MATCH_DONE);
7678 7799 }
7679 7800
7680 7801 /*
7681 7802 * Look up a probe based on provider and one or more of module name, function
7682 7803 * name and probe name.
7683 7804 */
7684 7805 dtrace_id_t
7685 7806 dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
7686 7807 const char *func, const char *name)
7687 7808 {
7688 7809 dtrace_probekey_t pkey;
7689 7810 dtrace_id_t id;
7690 7811 int match;
7691 7812
7692 7813 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
7693 7814 pkey.dtpk_pmatch = &dtrace_match_string;
7694 7815 pkey.dtpk_mod = mod;
7695 7816 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
7696 7817 pkey.dtpk_func = func;
7697 7818 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
7698 7819 pkey.dtpk_name = name;
7699 7820 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
7700 7821 pkey.dtpk_id = DTRACE_IDNONE;
7701 7822
7702 7823 mutex_enter(&dtrace_lock);
7703 7824 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
7704 7825 dtrace_probe_lookup_match, &id);
7705 7826 mutex_exit(&dtrace_lock);
7706 7827
7707 7828 ASSERT(match == 1 || match == 0);
7708 7829 return (match ? id : 0);
7709 7830 }
7710 7831
7711 7832 /*
7712 7833 * Returns the probe argument associated with the specified probe.
7713 7834 */
7714 7835 void *
7715 7836 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
7716 7837 {
7717 7838 dtrace_probe_t *probe;
7718 7839 void *rval = NULL;
7719 7840
7720 7841 mutex_enter(&dtrace_lock);
7721 7842
7722 7843 if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
7723 7844 probe->dtpr_provider == (dtrace_provider_t *)id)
7724 7845 rval = probe->dtpr_arg;
7725 7846
7726 7847 mutex_exit(&dtrace_lock);
7727 7848
7728 7849 return (rval);
7729 7850 }
7730 7851
7731 7852 /*
7732 7853 * Copy a probe into a probe description.
7733 7854 */
7734 7855 static void
7735 7856 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
7736 7857 {
7737 7858 bzero(pdp, sizeof (dtrace_probedesc_t));
7738 7859 pdp->dtpd_id = prp->dtpr_id;
7739 7860
7740 7861 (void) strncpy(pdp->dtpd_provider,
7741 7862 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
7742 7863
7743 7864 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
7744 7865 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
7745 7866 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
7746 7867 }
7747 7868
7748 7869 /*
7749 7870 * Called to indicate that a probe -- or probes -- should be provided by a
7750 7871 * specfied provider. If the specified description is NULL, the provider will
7751 7872 * be told to provide all of its probes. (This is done whenever a new
7752 7873 * consumer comes along, or whenever a retained enabling is to be matched.) If
7753 7874 * the specified description is non-NULL, the provider is given the
7754 7875 * opportunity to dynamically provide the specified probe, allowing providers
7755 7876 * to support the creation of probes on-the-fly. (So-called _autocreated_
7756 7877 * probes.) If the provider is NULL, the operations will be applied to all
7757 7878 * providers; if the provider is non-NULL the operations will only be applied
7758 7879 * to the specified provider. The dtrace_provider_lock must be held, and the
7759 7880 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7760 7881 * will need to grab the dtrace_lock when it reenters the framework through
7761 7882 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7762 7883 */
7763 7884 static void
7764 7885 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
7765 7886 {
7766 7887 struct modctl *ctl;
7767 7888 int all = 0;
7768 7889
7769 7890 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
7770 7891
7771 7892 if (prv == NULL) {
7772 7893 all = 1;
7773 7894 prv = dtrace_provider;
7774 7895 }
7775 7896
7776 7897 do {
7777 7898 /*
7778 7899 * First, call the blanket provide operation.
7779 7900 */
7780 7901 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
7781 7902
7782 7903 /*
7783 7904 * Now call the per-module provide operation. We will grab
7784 7905 * mod_lock to prevent the list from being modified. Note
7785 7906 * that this also prevents the mod_busy bits from changing.
7786 7907 * (mod_busy can only be changed with mod_lock held.)
7787 7908 */
7788 7909 mutex_enter(&mod_lock);
7789 7910
7790 7911 ctl = &modules;
7791 7912 do {
7792 7913 if (ctl->mod_busy || ctl->mod_mp == NULL)
7793 7914 continue;
7794 7915
7795 7916 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
7796 7917
7797 7918 } while ((ctl = ctl->mod_next) != &modules);
7798 7919
7799 7920 mutex_exit(&mod_lock);
7800 7921 } while (all && (prv = prv->dtpv_next) != NULL);
7801 7922 }
7802 7923
7803 7924 /*
7804 7925 * Iterate over each probe, and call the Framework-to-Provider API function
7805 7926 * denoted by offs.
7806 7927 */
7807 7928 static void
7808 7929 dtrace_probe_foreach(uintptr_t offs)
7809 7930 {
7810 7931 dtrace_provider_t *prov;
7811 7932 void (*func)(void *, dtrace_id_t, void *);
7812 7933 dtrace_probe_t *probe;
7813 7934 dtrace_icookie_t cookie;
7814 7935 int i;
7815 7936
7816 7937 /*
7817 7938 * We disable interrupts to walk through the probe array. This is
7818 7939 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7819 7940 * won't see stale data.
7820 7941 */
7821 7942 cookie = dtrace_interrupt_disable();
7822 7943
7823 7944 for (i = 0; i < dtrace_nprobes; i++) {
7824 7945 if ((probe = dtrace_probes[i]) == NULL)
7825 7946 continue;
7826 7947
7827 7948 if (probe->dtpr_ecb == NULL) {
7828 7949 /*
7829 7950 * This probe isn't enabled -- don't call the function.
7830 7951 */
7831 7952 continue;
7832 7953 }
7833 7954
7834 7955 prov = probe->dtpr_provider;
7835 7956 func = *((void(**)(void *, dtrace_id_t, void *))
7836 7957 ((uintptr_t)&prov->dtpv_pops + offs));
7837 7958
7838 7959 func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
7839 7960 }
7840 7961
7841 7962 dtrace_interrupt_enable(cookie);
7842 7963 }
7843 7964
7844 7965 static int
7845 7966 dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
7846 7967 {
7847 7968 dtrace_probekey_t pkey;
7848 7969 uint32_t priv;
7849 7970 uid_t uid;
7850 7971 zoneid_t zoneid;
7851 7972
7852 7973 ASSERT(MUTEX_HELD(&dtrace_lock));
7853 7974 dtrace_ecb_create_cache = NULL;
7854 7975
7855 7976 if (desc == NULL) {
7856 7977 /*
7857 7978 * If we're passed a NULL description, we're being asked to
7858 7979 * create an ECB with a NULL probe.
7859 7980 */
7860 7981 (void) dtrace_ecb_create_enable(NULL, enab);
7861 7982 return (0);
7862 7983 }
7863 7984
7864 7985 dtrace_probekey(desc, &pkey);
7865 7986 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
7866 7987 &priv, &uid, &zoneid);
7867 7988
7868 7989 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
7869 7990 enab));
7870 7991 }
7871 7992
7872 7993 /*
7873 7994 * DTrace Helper Provider Functions
7874 7995 */
7875 7996 static void
7876 7997 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
7877 7998 {
7878 7999 attr->dtat_name = DOF_ATTR_NAME(dofattr);
7879 8000 attr->dtat_data = DOF_ATTR_DATA(dofattr);
7880 8001 attr->dtat_class = DOF_ATTR_CLASS(dofattr);
7881 8002 }
7882 8003
7883 8004 static void
7884 8005 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
7885 8006 const dof_provider_t *dofprov, char *strtab)
7886 8007 {
7887 8008 hprov->dthpv_provname = strtab + dofprov->dofpv_name;
7888 8009 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
7889 8010 dofprov->dofpv_provattr);
7890 8011 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
7891 8012 dofprov->dofpv_modattr);
7892 8013 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
7893 8014 dofprov->dofpv_funcattr);
7894 8015 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
7895 8016 dofprov->dofpv_nameattr);
7896 8017 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
7897 8018 dofprov->dofpv_argsattr);
7898 8019 }
7899 8020
7900 8021 static void
7901 8022 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
7902 8023 {
7903 8024 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7904 8025 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7905 8026 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
7906 8027 dof_provider_t *provider;
7907 8028 dof_probe_t *probe;
7908 8029 uint32_t *off, *enoff;
7909 8030 uint8_t *arg;
7910 8031 char *strtab;
7911 8032 uint_t i, nprobes;
7912 8033 dtrace_helper_provdesc_t dhpv;
7913 8034 dtrace_helper_probedesc_t dhpb;
7914 8035 dtrace_meta_t *meta = dtrace_meta_pid;
7915 8036 dtrace_mops_t *mops = &meta->dtm_mops;
7916 8037 void *parg;
7917 8038
7918 8039 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
7919 8040 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7920 8041 provider->dofpv_strtab * dof->dofh_secsize);
7921 8042 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7922 8043 provider->dofpv_probes * dof->dofh_secsize);
7923 8044 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7924 8045 provider->dofpv_prargs * dof->dofh_secsize);
7925 8046 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7926 8047 provider->dofpv_proffs * dof->dofh_secsize);
7927 8048
7928 8049 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
7929 8050 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
7930 8051 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
7931 8052 enoff = NULL;
7932 8053
7933 8054 /*
7934 8055 * See dtrace_helper_provider_validate().
7935 8056 */
7936 8057 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
7937 8058 provider->dofpv_prenoffs != DOF_SECT_NONE) {
7938 8059 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
7939 8060 provider->dofpv_prenoffs * dof->dofh_secsize);
7940 8061 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
7941 8062 }
7942 8063
7943 8064 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
7944 8065
7945 8066 /*
7946 8067 * Create the provider.
7947 8068 */
7948 8069 dtrace_dofprov2hprov(&dhpv, provider, strtab);
7949 8070
7950 8071 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
7951 8072 return;
7952 8073
7953 8074 meta->dtm_count++;
7954 8075
7955 8076 /*
7956 8077 * Create the probes.
7957 8078 */
7958 8079 for (i = 0; i < nprobes; i++) {
7959 8080 probe = (dof_probe_t *)(uintptr_t)(daddr +
7960 8081 prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
7961 8082
7962 8083 dhpb.dthpb_mod = dhp->dofhp_mod;
7963 8084 dhpb.dthpb_func = strtab + probe->dofpr_func;
7964 8085 dhpb.dthpb_name = strtab + probe->dofpr_name;
7965 8086 dhpb.dthpb_base = probe->dofpr_addr;
7966 8087 dhpb.dthpb_offs = off + probe->dofpr_offidx;
7967 8088 dhpb.dthpb_noffs = probe->dofpr_noffs;
7968 8089 if (enoff != NULL) {
7969 8090 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
7970 8091 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
7971 8092 } else {
7972 8093 dhpb.dthpb_enoffs = NULL;
7973 8094 dhpb.dthpb_nenoffs = 0;
7974 8095 }
7975 8096 dhpb.dthpb_args = arg + probe->dofpr_argidx;
7976 8097 dhpb.dthpb_nargc = probe->dofpr_nargc;
7977 8098 dhpb.dthpb_xargc = probe->dofpr_xargc;
7978 8099 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
7979 8100 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
7980 8101
7981 8102 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
7982 8103 }
7983 8104 }
7984 8105
7985 8106 static void
7986 8107 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
7987 8108 {
7988 8109 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
7989 8110 dof_hdr_t *dof = (dof_hdr_t *)daddr;
7990 8111 int i;
7991 8112
7992 8113 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
7993 8114
7994 8115 for (i = 0; i < dof->dofh_secnum; i++) {
7995 8116 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
7996 8117 dof->dofh_secoff + i * dof->dofh_secsize);
7997 8118
7998 8119 if (sec->dofs_type != DOF_SECT_PROVIDER)
7999 8120 continue;
8000 8121
8001 8122 dtrace_helper_provide_one(dhp, sec, pid);
8002 8123 }
8003 8124
8004 8125 /*
8005 8126 * We may have just created probes, so we must now rematch against
8006 8127 * any retained enablings. Note that this call will acquire both
8007 8128 * cpu_lock and dtrace_lock; the fact that we are holding
8008 8129 * dtrace_meta_lock now is what defines the ordering with respect to
8009 8130 * these three locks.
8010 8131 */
8011 8132 dtrace_enabling_matchall();
8012 8133 }
8013 8134
8014 8135 static void
8015 8136 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
8016 8137 {
8017 8138 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8018 8139 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8019 8140 dof_sec_t *str_sec;
8020 8141 dof_provider_t *provider;
8021 8142 char *strtab;
8022 8143 dtrace_helper_provdesc_t dhpv;
8023 8144 dtrace_meta_t *meta = dtrace_meta_pid;
8024 8145 dtrace_mops_t *mops = &meta->dtm_mops;
8025 8146
8026 8147 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8027 8148 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8028 8149 provider->dofpv_strtab * dof->dofh_secsize);
8029 8150
8030 8151 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8031 8152
8032 8153 /*
8033 8154 * Create the provider.
8034 8155 */
8035 8156 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8036 8157
8037 8158 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
8038 8159
8039 8160 meta->dtm_count--;
8040 8161 }
8041 8162
8042 8163 static void
8043 8164 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
8044 8165 {
8045 8166 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8046 8167 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8047 8168 int i;
8048 8169
8049 8170 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
8050 8171
8051 8172 for (i = 0; i < dof->dofh_secnum; i++) {
8052 8173 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8053 8174 dof->dofh_secoff + i * dof->dofh_secsize);
8054 8175
8055 8176 if (sec->dofs_type != DOF_SECT_PROVIDER)
8056 8177 continue;
8057 8178
8058 8179 dtrace_helper_provider_remove_one(dhp, sec, pid);
8059 8180 }
8060 8181 }
8061 8182
8062 8183 /*
8063 8184 * DTrace Meta Provider-to-Framework API Functions
8064 8185 *
8065 8186 * These functions implement the Meta Provider-to-Framework API, as described
8066 8187 * in <sys/dtrace.h>.
8067 8188 */
8068 8189 int
8069 8190 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
8070 8191 dtrace_meta_provider_id_t *idp)
8071 8192 {
8072 8193 dtrace_meta_t *meta;
8073 8194 dtrace_helpers_t *help, *next;
8074 8195 int i;
8075 8196
8076 8197 *idp = DTRACE_METAPROVNONE;
8077 8198
8078 8199 /*
8079 8200 * We strictly don't need the name, but we hold onto it for
8080 8201 * debuggability. All hail error queues!
8081 8202 */
8082 8203 if (name == NULL) {
8083 8204 cmn_err(CE_WARN, "failed to register meta-provider: "
8084 8205 "invalid name");
8085 8206 return (EINVAL);
8086 8207 }
8087 8208
8088 8209 if (mops == NULL ||
8089 8210 mops->dtms_create_probe == NULL ||
8090 8211 mops->dtms_provide_pid == NULL ||
8091 8212 mops->dtms_remove_pid == NULL) {
8092 8213 cmn_err(CE_WARN, "failed to register meta-register %s: "
8093 8214 "invalid ops", name);
8094 8215 return (EINVAL);
8095 8216 }
8096 8217
8097 8218 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
8098 8219 meta->dtm_mops = *mops;
8099 8220 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
8100 8221 (void) strcpy(meta->dtm_name, name);
8101 8222 meta->dtm_arg = arg;
8102 8223
8103 8224 mutex_enter(&dtrace_meta_lock);
8104 8225 mutex_enter(&dtrace_lock);
8105 8226
8106 8227 if (dtrace_meta_pid != NULL) {
8107 8228 mutex_exit(&dtrace_lock);
8108 8229 mutex_exit(&dtrace_meta_lock);
8109 8230 cmn_err(CE_WARN, "failed to register meta-register %s: "
8110 8231 "user-land meta-provider exists", name);
8111 8232 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
8112 8233 kmem_free(meta, sizeof (dtrace_meta_t));
8113 8234 return (EINVAL);
8114 8235 }
8115 8236
8116 8237 dtrace_meta_pid = meta;
8117 8238 *idp = (dtrace_meta_provider_id_t)meta;
8118 8239
8119 8240 /*
8120 8241 * If there are providers and probes ready to go, pass them
8121 8242 * off to the new meta provider now.
8122 8243 */
8123 8244
8124 8245 help = dtrace_deferred_pid;
8125 8246 dtrace_deferred_pid = NULL;
8126 8247
8127 8248 mutex_exit(&dtrace_lock);
8128 8249
8129 8250 while (help != NULL) {
8130 8251 for (i = 0; i < help->dthps_nprovs; i++) {
8131 8252 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
8132 8253 help->dthps_pid);
8133 8254 }
8134 8255
8135 8256 next = help->dthps_next;
8136 8257 help->dthps_next = NULL;
8137 8258 help->dthps_prev = NULL;
8138 8259 help->dthps_deferred = 0;
8139 8260 help = next;
8140 8261 }
8141 8262
8142 8263 mutex_exit(&dtrace_meta_lock);
8143 8264
8144 8265 return (0);
8145 8266 }
8146 8267
8147 8268 int
8148 8269 dtrace_meta_unregister(dtrace_meta_provider_id_t id)
8149 8270 {
8150 8271 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
8151 8272
8152 8273 mutex_enter(&dtrace_meta_lock);
8153 8274 mutex_enter(&dtrace_lock);
8154 8275
8155 8276 if (old == dtrace_meta_pid) {
8156 8277 pp = &dtrace_meta_pid;
8157 8278 } else {
8158 8279 panic("attempt to unregister non-existent "
8159 8280 "dtrace meta-provider %p\n", (void *)old);
8160 8281 }
8161 8282
8162 8283 if (old->dtm_count != 0) {
8163 8284 mutex_exit(&dtrace_lock);
8164 8285 mutex_exit(&dtrace_meta_lock);
8165 8286 return (EBUSY);
8166 8287 }
8167 8288
8168 8289 *pp = NULL;
8169 8290
8170 8291 mutex_exit(&dtrace_lock);
8171 8292 mutex_exit(&dtrace_meta_lock);
8172 8293
8173 8294 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
8174 8295 kmem_free(old, sizeof (dtrace_meta_t));
8175 8296
8176 8297 return (0);
8177 8298 }
8178 8299
8179 8300
8180 8301 /*
8181 8302 * DTrace DIF Object Functions
8182 8303 */
8183 8304 static int
8184 8305 dtrace_difo_err(uint_t pc, const char *format, ...)
8185 8306 {
8186 8307 if (dtrace_err_verbose) {
8187 8308 va_list alist;
8188 8309
8189 8310 (void) uprintf("dtrace DIF object error: [%u]: ", pc);
8190 8311 va_start(alist, format);
8191 8312 (void) vuprintf(format, alist);
8192 8313 va_end(alist);
8193 8314 }
8194 8315
8195 8316 #ifdef DTRACE_ERRDEBUG
8196 8317 dtrace_errdebug(format);
8197 8318 #endif
8198 8319 return (1);
8199 8320 }
8200 8321
8201 8322 /*
8202 8323 * Validate a DTrace DIF object by checking the IR instructions. The following
8203 8324 * rules are currently enforced by dtrace_difo_validate():
8204 8325 *
8205 8326 * 1. Each instruction must have a valid opcode
8206 8327 * 2. Each register, string, variable, or subroutine reference must be valid
8207 8328 * 3. No instruction can modify register %r0 (must be zero)
8208 8329 * 4. All instruction reserved bits must be set to zero
8209 8330 * 5. The last instruction must be a "ret" instruction
8210 8331 * 6. All branch targets must reference a valid instruction _after_ the branch
8211 8332 */
8212 8333 static int
8213 8334 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
8214 8335 cred_t *cr)
8215 8336 {
8216 8337 int err = 0, i;
8217 8338 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8218 8339 int kcheckload;
8219 8340 uint_t pc;
8220 8341
8221 8342 kcheckload = cr == NULL ||
8222 8343 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
8223 8344
8224 8345 dp->dtdo_destructive = 0;
8225 8346
8226 8347 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
8227 8348 dif_instr_t instr = dp->dtdo_buf[pc];
8228 8349
8229 8350 uint_t r1 = DIF_INSTR_R1(instr);
8230 8351 uint_t r2 = DIF_INSTR_R2(instr);
8231 8352 uint_t rd = DIF_INSTR_RD(instr);
8232 8353 uint_t rs = DIF_INSTR_RS(instr);
8233 8354 uint_t label = DIF_INSTR_LABEL(instr);
8234 8355 uint_t v = DIF_INSTR_VAR(instr);
8235 8356 uint_t subr = DIF_INSTR_SUBR(instr);
8236 8357 uint_t type = DIF_INSTR_TYPE(instr);
8237 8358 uint_t op = DIF_INSTR_OP(instr);
8238 8359
8239 8360 switch (op) {
8240 8361 case DIF_OP_OR:
8241 8362 case DIF_OP_XOR:
8242 8363 case DIF_OP_AND:
8243 8364 case DIF_OP_SLL:
8244 8365 case DIF_OP_SRL:
8245 8366 case DIF_OP_SRA:
8246 8367 case DIF_OP_SUB:
8247 8368 case DIF_OP_ADD:
8248 8369 case DIF_OP_MUL:
8249 8370 case DIF_OP_SDIV:
8250 8371 case DIF_OP_UDIV:
8251 8372 case DIF_OP_SREM:
8252 8373 case DIF_OP_UREM:
8253 8374 case DIF_OP_COPYS:
8254 8375 if (r1 >= nregs)
8255 8376 err += efunc(pc, "invalid register %u\n", r1);
8256 8377 if (r2 >= nregs)
8257 8378 err += efunc(pc, "invalid register %u\n", r2);
8258 8379 if (rd >= nregs)
8259 8380 err += efunc(pc, "invalid register %u\n", rd);
8260 8381 if (rd == 0)
8261 8382 err += efunc(pc, "cannot write to %r0\n");
8262 8383 break;
8263 8384 case DIF_OP_NOT:
8264 8385 case DIF_OP_MOV:
8265 8386 case DIF_OP_ALLOCS:
8266 8387 if (r1 >= nregs)
8267 8388 err += efunc(pc, "invalid register %u\n", r1);
8268 8389 if (r2 != 0)
8269 8390 err += efunc(pc, "non-zero reserved bits\n");
8270 8391 if (rd >= nregs)
8271 8392 err += efunc(pc, "invalid register %u\n", rd);
8272 8393 if (rd == 0)
8273 8394 err += efunc(pc, "cannot write to %r0\n");
8274 8395 break;
8275 8396 case DIF_OP_LDSB:
8276 8397 case DIF_OP_LDSH:
8277 8398 case DIF_OP_LDSW:
8278 8399 case DIF_OP_LDUB:
8279 8400 case DIF_OP_LDUH:
8280 8401 case DIF_OP_LDUW:
8281 8402 case DIF_OP_LDX:
8282 8403 if (r1 >= nregs)
8283 8404 err += efunc(pc, "invalid register %u\n", r1);
8284 8405 if (r2 != 0)
8285 8406 err += efunc(pc, "non-zero reserved bits\n");
8286 8407 if (rd >= nregs)
8287 8408 err += efunc(pc, "invalid register %u\n", rd);
8288 8409 if (rd == 0)
8289 8410 err += efunc(pc, "cannot write to %r0\n");
8290 8411 if (kcheckload)
8291 8412 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
8292 8413 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
8293 8414 break;
8294 8415 case DIF_OP_RLDSB:
8295 8416 case DIF_OP_RLDSH:
8296 8417 case DIF_OP_RLDSW:
8297 8418 case DIF_OP_RLDUB:
8298 8419 case DIF_OP_RLDUH:
8299 8420 case DIF_OP_RLDUW:
8300 8421 case DIF_OP_RLDX:
8301 8422 if (r1 >= nregs)
8302 8423 err += efunc(pc, "invalid register %u\n", r1);
8303 8424 if (r2 != 0)
8304 8425 err += efunc(pc, "non-zero reserved bits\n");
8305 8426 if (rd >= nregs)
8306 8427 err += efunc(pc, "invalid register %u\n", rd);
8307 8428 if (rd == 0)
8308 8429 err += efunc(pc, "cannot write to %r0\n");
8309 8430 break;
8310 8431 case DIF_OP_ULDSB:
8311 8432 case DIF_OP_ULDSH:
8312 8433 case DIF_OP_ULDSW:
8313 8434 case DIF_OP_ULDUB:
8314 8435 case DIF_OP_ULDUH:
8315 8436 case DIF_OP_ULDUW:
8316 8437 case DIF_OP_ULDX:
8317 8438 if (r1 >= nregs)
8318 8439 err += efunc(pc, "invalid register %u\n", r1);
8319 8440 if (r2 != 0)
8320 8441 err += efunc(pc, "non-zero reserved bits\n");
8321 8442 if (rd >= nregs)
8322 8443 err += efunc(pc, "invalid register %u\n", rd);
8323 8444 if (rd == 0)
8324 8445 err += efunc(pc, "cannot write to %r0\n");
8325 8446 break;
8326 8447 case DIF_OP_STB:
8327 8448 case DIF_OP_STH:
8328 8449 case DIF_OP_STW:
8329 8450 case DIF_OP_STX:
8330 8451 if (r1 >= nregs)
8331 8452 err += efunc(pc, "invalid register %u\n", r1);
8332 8453 if (r2 != 0)
8333 8454 err += efunc(pc, "non-zero reserved bits\n");
8334 8455 if (rd >= nregs)
8335 8456 err += efunc(pc, "invalid register %u\n", rd);
8336 8457 if (rd == 0)
8337 8458 err += efunc(pc, "cannot write to 0 address\n");
8338 8459 break;
8339 8460 case DIF_OP_CMP:
8340 8461 case DIF_OP_SCMP:
8341 8462 if (r1 >= nregs)
8342 8463 err += efunc(pc, "invalid register %u\n", r1);
8343 8464 if (r2 >= nregs)
8344 8465 err += efunc(pc, "invalid register %u\n", r2);
8345 8466 if (rd != 0)
8346 8467 err += efunc(pc, "non-zero reserved bits\n");
8347 8468 break;
8348 8469 case DIF_OP_TST:
8349 8470 if (r1 >= nregs)
8350 8471 err += efunc(pc, "invalid register %u\n", r1);
8351 8472 if (r2 != 0 || rd != 0)
8352 8473 err += efunc(pc, "non-zero reserved bits\n");
8353 8474 break;
8354 8475 case DIF_OP_BA:
8355 8476 case DIF_OP_BE:
8356 8477 case DIF_OP_BNE:
8357 8478 case DIF_OP_BG:
8358 8479 case DIF_OP_BGU:
8359 8480 case DIF_OP_BGE:
8360 8481 case DIF_OP_BGEU:
8361 8482 case DIF_OP_BL:
8362 8483 case DIF_OP_BLU:
8363 8484 case DIF_OP_BLE:
8364 8485 case DIF_OP_BLEU:
8365 8486 if (label >= dp->dtdo_len) {
8366 8487 err += efunc(pc, "invalid branch target %u\n",
8367 8488 label);
8368 8489 }
8369 8490 if (label <= pc) {
8370 8491 err += efunc(pc, "backward branch to %u\n",
8371 8492 label);
8372 8493 }
8373 8494 break;
8374 8495 case DIF_OP_RET:
8375 8496 if (r1 != 0 || r2 != 0)
8376 8497 err += efunc(pc, "non-zero reserved bits\n");
8377 8498 if (rd >= nregs)
8378 8499 err += efunc(pc, "invalid register %u\n", rd);
8379 8500 break;
8380 8501 case DIF_OP_NOP:
8381 8502 case DIF_OP_POPTS:
8382 8503 case DIF_OP_FLUSHTS:
8383 8504 if (r1 != 0 || r2 != 0 || rd != 0)
8384 8505 err += efunc(pc, "non-zero reserved bits\n");
8385 8506 break;
8386 8507 case DIF_OP_SETX:
8387 8508 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
8388 8509 err += efunc(pc, "invalid integer ref %u\n",
8389 8510 DIF_INSTR_INTEGER(instr));
8390 8511 }
8391 8512 if (rd >= nregs)
8392 8513 err += efunc(pc, "invalid register %u\n", rd);
8393 8514 if (rd == 0)
8394 8515 err += efunc(pc, "cannot write to %r0\n");
8395 8516 break;
8396 8517 case DIF_OP_SETS:
8397 8518 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
8398 8519 err += efunc(pc, "invalid string ref %u\n",
8399 8520 DIF_INSTR_STRING(instr));
8400 8521 }
8401 8522 if (rd >= nregs)
8402 8523 err += efunc(pc, "invalid register %u\n", rd);
8403 8524 if (rd == 0)
8404 8525 err += efunc(pc, "cannot write to %r0\n");
8405 8526 break;
8406 8527 case DIF_OP_LDGA:
8407 8528 case DIF_OP_LDTA:
8408 8529 if (r1 > DIF_VAR_ARRAY_MAX)
8409 8530 err += efunc(pc, "invalid array %u\n", r1);
8410 8531 if (r2 >= nregs)
8411 8532 err += efunc(pc, "invalid register %u\n", r2);
8412 8533 if (rd >= nregs)
8413 8534 err += efunc(pc, "invalid register %u\n", rd);
8414 8535 if (rd == 0)
8415 8536 err += efunc(pc, "cannot write to %r0\n");
8416 8537 break;
8417 8538 case DIF_OP_LDGS:
8418 8539 case DIF_OP_LDTS:
8419 8540 case DIF_OP_LDLS:
8420 8541 case DIF_OP_LDGAA:
8421 8542 case DIF_OP_LDTAA:
8422 8543 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
8423 8544 err += efunc(pc, "invalid variable %u\n", v);
8424 8545 if (rd >= nregs)
8425 8546 err += efunc(pc, "invalid register %u\n", rd);
8426 8547 if (rd == 0)
8427 8548 err += efunc(pc, "cannot write to %r0\n");
8428 8549 break;
8429 8550 case DIF_OP_STGS:
8430 8551 case DIF_OP_STTS:
8431 8552 case DIF_OP_STLS:
8432 8553 case DIF_OP_STGAA:
8433 8554 case DIF_OP_STTAA:
8434 8555 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8435 8556 err += efunc(pc, "invalid variable %u\n", v);
8436 8557 if (rs >= nregs)
8437 8558 err += efunc(pc, "invalid register %u\n", rd);
8438 8559 break;
8439 8560 case DIF_OP_CALL:
8440 8561 if (subr > DIF_SUBR_MAX)
↓ open down ↓ |
2486 lines elided |
↑ open up ↑ |
8441 8562 err += efunc(pc, "invalid subr %u\n", subr);
8442 8563 if (rd >= nregs)
8443 8564 err += efunc(pc, "invalid register %u\n", rd);
8444 8565 if (rd == 0)
8445 8566 err += efunc(pc, "cannot write to %r0\n");
8446 8567
8447 8568 if (subr == DIF_SUBR_COPYOUT ||
8448 8569 subr == DIF_SUBR_COPYOUTSTR) {
8449 8570 dp->dtdo_destructive = 1;
8450 8571 }
8572 +
8573 + if (subr == DIF_SUBR_GETF) {
8574 + /*
8575 + * If we have a getf() we need to record that
8576 + * in our state. Note that our state can be
8577 + * NULL if this is a helper -- but in that
8578 + * case, the call to getf() is itself illegal,
8579 + * and will be caught (slightly later) when
8580 + * the helper is validated.
8581 + */
8582 + if (vstate->dtvs_state != NULL)
8583 + vstate->dtvs_state->dts_getf++;
8584 + }
8585 +
8451 8586 break;
8452 8587 case DIF_OP_PUSHTR:
8453 8588 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8454 8589 err += efunc(pc, "invalid ref type %u\n", type);
8455 8590 if (r2 >= nregs)
8456 8591 err += efunc(pc, "invalid register %u\n", r2);
8457 8592 if (rs >= nregs)
8458 8593 err += efunc(pc, "invalid register %u\n", rs);
8459 8594 break;
8460 8595 case DIF_OP_PUSHTV:
8461 8596 if (type != DIF_TYPE_CTF)
8462 8597 err += efunc(pc, "invalid val type %u\n", type);
8463 8598 if (r2 >= nregs)
8464 8599 err += efunc(pc, "invalid register %u\n", r2);
8465 8600 if (rs >= nregs)
8466 8601 err += efunc(pc, "invalid register %u\n", rs);
8467 8602 break;
8468 8603 default:
8469 8604 err += efunc(pc, "invalid opcode %u\n",
8470 8605 DIF_INSTR_OP(instr));
8471 8606 }
8472 8607 }
8473 8608
8474 8609 if (dp->dtdo_len != 0 &&
8475 8610 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
8476 8611 err += efunc(dp->dtdo_len - 1,
8477 8612 "expected 'ret' as last DIF instruction\n");
8478 8613 }
8479 8614
8480 8615 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) {
8481 8616 /*
8482 8617 * If we're not returning by reference, the size must be either
8483 8618 * 0 or the size of one of the base types.
8484 8619 */
8485 8620 switch (dp->dtdo_rtype.dtdt_size) {
8486 8621 case 0:
8487 8622 case sizeof (uint8_t):
8488 8623 case sizeof (uint16_t):
8489 8624 case sizeof (uint32_t):
8490 8625 case sizeof (uint64_t):
8491 8626 break;
8492 8627
8493 8628 default:
8494 8629 err += efunc(dp->dtdo_len - 1, "bad return size\n");
8495 8630 }
8496 8631 }
8497 8632
8498 8633 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
8499 8634 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
8500 8635 dtrace_diftype_t *vt, *et;
8501 8636 uint_t id, ndx;
8502 8637
8503 8638 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
8504 8639 v->dtdv_scope != DIFV_SCOPE_THREAD &&
8505 8640 v->dtdv_scope != DIFV_SCOPE_LOCAL) {
8506 8641 err += efunc(i, "unrecognized variable scope %d\n",
8507 8642 v->dtdv_scope);
8508 8643 break;
8509 8644 }
8510 8645
8511 8646 if (v->dtdv_kind != DIFV_KIND_ARRAY &&
8512 8647 v->dtdv_kind != DIFV_KIND_SCALAR) {
8513 8648 err += efunc(i, "unrecognized variable type %d\n",
8514 8649 v->dtdv_kind);
8515 8650 break;
8516 8651 }
8517 8652
8518 8653 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
8519 8654 err += efunc(i, "%d exceeds variable id limit\n", id);
8520 8655 break;
8521 8656 }
8522 8657
8523 8658 if (id < DIF_VAR_OTHER_UBASE)
8524 8659 continue;
8525 8660
8526 8661 /*
8527 8662 * For user-defined variables, we need to check that this
8528 8663 * definition is identical to any previous definition that we
8529 8664 * encountered.
8530 8665 */
8531 8666 ndx = id - DIF_VAR_OTHER_UBASE;
8532 8667
8533 8668 switch (v->dtdv_scope) {
8534 8669 case DIFV_SCOPE_GLOBAL:
8535 8670 if (ndx < vstate->dtvs_nglobals) {
8536 8671 dtrace_statvar_t *svar;
8537 8672
8538 8673 if ((svar = vstate->dtvs_globals[ndx]) != NULL)
8539 8674 existing = &svar->dtsv_var;
8540 8675 }
8541 8676
8542 8677 break;
8543 8678
8544 8679 case DIFV_SCOPE_THREAD:
8545 8680 if (ndx < vstate->dtvs_ntlocals)
8546 8681 existing = &vstate->dtvs_tlocals[ndx];
8547 8682 break;
8548 8683
8549 8684 case DIFV_SCOPE_LOCAL:
8550 8685 if (ndx < vstate->dtvs_nlocals) {
8551 8686 dtrace_statvar_t *svar;
8552 8687
8553 8688 if ((svar = vstate->dtvs_locals[ndx]) != NULL)
8554 8689 existing = &svar->dtsv_var;
8555 8690 }
8556 8691
8557 8692 break;
8558 8693 }
8559 8694
8560 8695 vt = &v->dtdv_type;
8561 8696
8562 8697 if (vt->dtdt_flags & DIF_TF_BYREF) {
8563 8698 if (vt->dtdt_size == 0) {
8564 8699 err += efunc(i, "zero-sized variable\n");
8565 8700 break;
8566 8701 }
8567 8702
8568 8703 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL &&
8569 8704 vt->dtdt_size > dtrace_global_maxsize) {
8570 8705 err += efunc(i, "oversized by-ref global\n");
8571 8706 break;
8572 8707 }
8573 8708 }
8574 8709
8575 8710 if (existing == NULL || existing->dtdv_id == 0)
8576 8711 continue;
8577 8712
8578 8713 ASSERT(existing->dtdv_id == v->dtdv_id);
8579 8714 ASSERT(existing->dtdv_scope == v->dtdv_scope);
8580 8715
8581 8716 if (existing->dtdv_kind != v->dtdv_kind)
8582 8717 err += efunc(i, "%d changed variable kind\n", id);
8583 8718
8584 8719 et = &existing->dtdv_type;
8585 8720
8586 8721 if (vt->dtdt_flags != et->dtdt_flags) {
8587 8722 err += efunc(i, "%d changed variable type flags\n", id);
8588 8723 break;
8589 8724 }
8590 8725
8591 8726 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
8592 8727 err += efunc(i, "%d changed variable type size\n", id);
8593 8728 break;
8594 8729 }
8595 8730 }
8596 8731
8597 8732 return (err);
8598 8733 }
8599 8734
8600 8735 /*
8601 8736 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8602 8737 * are much more constrained than normal DIFOs. Specifically, they may
8603 8738 * not:
8604 8739 *
8605 8740 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8606 8741 * miscellaneous string routines
8607 8742 * 2. Access DTrace variables other than the args[] array, and the
8608 8743 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8609 8744 * 3. Have thread-local variables.
8610 8745 * 4. Have dynamic variables.
8611 8746 */
8612 8747 static int
8613 8748 dtrace_difo_validate_helper(dtrace_difo_t *dp)
8614 8749 {
8615 8750 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8616 8751 int err = 0;
8617 8752 uint_t pc;
8618 8753
8619 8754 for (pc = 0; pc < dp->dtdo_len; pc++) {
8620 8755 dif_instr_t instr = dp->dtdo_buf[pc];
8621 8756
8622 8757 uint_t v = DIF_INSTR_VAR(instr);
8623 8758 uint_t subr = DIF_INSTR_SUBR(instr);
8624 8759 uint_t op = DIF_INSTR_OP(instr);
8625 8760
8626 8761 switch (op) {
8627 8762 case DIF_OP_OR:
8628 8763 case DIF_OP_XOR:
8629 8764 case DIF_OP_AND:
8630 8765 case DIF_OP_SLL:
8631 8766 case DIF_OP_SRL:
8632 8767 case DIF_OP_SRA:
8633 8768 case DIF_OP_SUB:
8634 8769 case DIF_OP_ADD:
8635 8770 case DIF_OP_MUL:
8636 8771 case DIF_OP_SDIV:
8637 8772 case DIF_OP_UDIV:
8638 8773 case DIF_OP_SREM:
8639 8774 case DIF_OP_UREM:
8640 8775 case DIF_OP_COPYS:
8641 8776 case DIF_OP_NOT:
8642 8777 case DIF_OP_MOV:
8643 8778 case DIF_OP_RLDSB:
8644 8779 case DIF_OP_RLDSH:
8645 8780 case DIF_OP_RLDSW:
8646 8781 case DIF_OP_RLDUB:
8647 8782 case DIF_OP_RLDUH:
8648 8783 case DIF_OP_RLDUW:
8649 8784 case DIF_OP_RLDX:
8650 8785 case DIF_OP_ULDSB:
8651 8786 case DIF_OP_ULDSH:
8652 8787 case DIF_OP_ULDSW:
8653 8788 case DIF_OP_ULDUB:
8654 8789 case DIF_OP_ULDUH:
8655 8790 case DIF_OP_ULDUW:
8656 8791 case DIF_OP_ULDX:
8657 8792 case DIF_OP_STB:
8658 8793 case DIF_OP_STH:
8659 8794 case DIF_OP_STW:
8660 8795 case DIF_OP_STX:
8661 8796 case DIF_OP_ALLOCS:
8662 8797 case DIF_OP_CMP:
8663 8798 case DIF_OP_SCMP:
8664 8799 case DIF_OP_TST:
8665 8800 case DIF_OP_BA:
8666 8801 case DIF_OP_BE:
8667 8802 case DIF_OP_BNE:
8668 8803 case DIF_OP_BG:
8669 8804 case DIF_OP_BGU:
8670 8805 case DIF_OP_BGE:
8671 8806 case DIF_OP_BGEU:
8672 8807 case DIF_OP_BL:
8673 8808 case DIF_OP_BLU:
8674 8809 case DIF_OP_BLE:
8675 8810 case DIF_OP_BLEU:
8676 8811 case DIF_OP_RET:
8677 8812 case DIF_OP_NOP:
8678 8813 case DIF_OP_POPTS:
8679 8814 case DIF_OP_FLUSHTS:
8680 8815 case DIF_OP_SETX:
8681 8816 case DIF_OP_SETS:
8682 8817 case DIF_OP_LDGA:
8683 8818 case DIF_OP_LDLS:
8684 8819 case DIF_OP_STGS:
8685 8820 case DIF_OP_STLS:
8686 8821 case DIF_OP_PUSHTR:
8687 8822 case DIF_OP_PUSHTV:
8688 8823 break;
8689 8824
8690 8825 case DIF_OP_LDGS:
8691 8826 if (v >= DIF_VAR_OTHER_UBASE)
8692 8827 break;
8693 8828
8694 8829 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
8695 8830 break;
8696 8831
8697 8832 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
8698 8833 v == DIF_VAR_PPID || v == DIF_VAR_TID ||
8699 8834 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
8700 8835 v == DIF_VAR_UID || v == DIF_VAR_GID)
8701 8836 break;
8702 8837
8703 8838 err += efunc(pc, "illegal variable %u\n", v);
8704 8839 break;
8705 8840
8706 8841 case DIF_OP_LDTA:
8707 8842 case DIF_OP_LDTS:
8708 8843 case DIF_OP_LDGAA:
8709 8844 case DIF_OP_LDTAA:
8710 8845 err += efunc(pc, "illegal dynamic variable load\n");
8711 8846 break;
8712 8847
8713 8848 case DIF_OP_STTS:
8714 8849 case DIF_OP_STGAA:
8715 8850 case DIF_OP_STTAA:
8716 8851 err += efunc(pc, "illegal dynamic variable store\n");
8717 8852 break;
8718 8853
8719 8854 case DIF_OP_CALL:
8720 8855 if (subr == DIF_SUBR_ALLOCA ||
8721 8856 subr == DIF_SUBR_BCOPY ||
8722 8857 subr == DIF_SUBR_COPYIN ||
8723 8858 subr == DIF_SUBR_COPYINTO ||
8724 8859 subr == DIF_SUBR_COPYINSTR ||
8725 8860 subr == DIF_SUBR_INDEX ||
8726 8861 subr == DIF_SUBR_INET_NTOA ||
8727 8862 subr == DIF_SUBR_INET_NTOA6 ||
8728 8863 subr == DIF_SUBR_INET_NTOP ||
8729 8864 subr == DIF_SUBR_LLTOSTR ||
8730 8865 subr == DIF_SUBR_RINDEX ||
8731 8866 subr == DIF_SUBR_STRCHR ||
8732 8867 subr == DIF_SUBR_STRJOIN ||
8733 8868 subr == DIF_SUBR_STRRCHR ||
8734 8869 subr == DIF_SUBR_STRSTR ||
8735 8870 subr == DIF_SUBR_HTONS ||
8736 8871 subr == DIF_SUBR_HTONL ||
8737 8872 subr == DIF_SUBR_HTONLL ||
8738 8873 subr == DIF_SUBR_NTOHS ||
8739 8874 subr == DIF_SUBR_NTOHL ||
8740 8875 subr == DIF_SUBR_NTOHLL)
8741 8876 break;
8742 8877
8743 8878 err += efunc(pc, "invalid subr %u\n", subr);
8744 8879 break;
8745 8880
8746 8881 default:
8747 8882 err += efunc(pc, "invalid opcode %u\n",
8748 8883 DIF_INSTR_OP(instr));
8749 8884 }
8750 8885 }
8751 8886
8752 8887 return (err);
8753 8888 }
8754 8889
8755 8890 /*
8756 8891 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8757 8892 * basis; 0 if not.
8758 8893 */
8759 8894 static int
8760 8895 dtrace_difo_cacheable(dtrace_difo_t *dp)
8761 8896 {
8762 8897 int i;
8763 8898
8764 8899 if (dp == NULL)
8765 8900 return (0);
8766 8901
8767 8902 for (i = 0; i < dp->dtdo_varlen; i++) {
8768 8903 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8769 8904
8770 8905 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
8771 8906 continue;
8772 8907
8773 8908 switch (v->dtdv_id) {
8774 8909 case DIF_VAR_CURTHREAD:
8775 8910 case DIF_VAR_PID:
8776 8911 case DIF_VAR_TID:
8777 8912 case DIF_VAR_EXECNAME:
8778 8913 case DIF_VAR_ZONENAME:
8779 8914 break;
8780 8915
8781 8916 default:
8782 8917 return (0);
8783 8918 }
8784 8919 }
8785 8920
8786 8921 /*
8787 8922 * This DIF object may be cacheable. Now we need to look for any
8788 8923 * array loading instructions, any memory loading instructions, or
8789 8924 * any stores to thread-local variables.
8790 8925 */
8791 8926 for (i = 0; i < dp->dtdo_len; i++) {
8792 8927 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
8793 8928
8794 8929 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
8795 8930 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
8796 8931 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
8797 8932 op == DIF_OP_LDGA || op == DIF_OP_STTS)
8798 8933 return (0);
8799 8934 }
8800 8935
8801 8936 return (1);
8802 8937 }
8803 8938
8804 8939 static void
8805 8940 dtrace_difo_hold(dtrace_difo_t *dp)
8806 8941 {
8807 8942 int i;
8808 8943
8809 8944 ASSERT(MUTEX_HELD(&dtrace_lock));
8810 8945
8811 8946 dp->dtdo_refcnt++;
8812 8947 ASSERT(dp->dtdo_refcnt != 0);
8813 8948
8814 8949 /*
8815 8950 * We need to check this DIF object for references to the variable
8816 8951 * DIF_VAR_VTIMESTAMP.
8817 8952 */
8818 8953 for (i = 0; i < dp->dtdo_varlen; i++) {
8819 8954 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8820 8955
8821 8956 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
8822 8957 continue;
8823 8958
8824 8959 if (dtrace_vtime_references++ == 0)
8825 8960 dtrace_vtime_enable();
8826 8961 }
8827 8962 }
8828 8963
8829 8964 /*
8830 8965 * This routine calculates the dynamic variable chunksize for a given DIF
8831 8966 * object. The calculation is not fool-proof, and can probably be tricked by
8832 8967 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8833 8968 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8834 8969 * if a dynamic variable size exceeds the chunksize.
8835 8970 */
8836 8971 static void
8837 8972 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
8838 8973 {
8839 8974 uint64_t sval;
8840 8975 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
8841 8976 const dif_instr_t *text = dp->dtdo_buf;
8842 8977 uint_t pc, srd = 0;
8843 8978 uint_t ttop = 0;
8844 8979 size_t size, ksize;
8845 8980 uint_t id, i;
8846 8981
8847 8982 for (pc = 0; pc < dp->dtdo_len; pc++) {
8848 8983 dif_instr_t instr = text[pc];
8849 8984 uint_t op = DIF_INSTR_OP(instr);
8850 8985 uint_t rd = DIF_INSTR_RD(instr);
8851 8986 uint_t r1 = DIF_INSTR_R1(instr);
8852 8987 uint_t nkeys = 0;
8853 8988 uchar_t scope;
8854 8989
8855 8990 dtrace_key_t *key = tupregs;
8856 8991
8857 8992 switch (op) {
8858 8993 case DIF_OP_SETX:
8859 8994 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
8860 8995 srd = rd;
8861 8996 continue;
8862 8997
8863 8998 case DIF_OP_STTS:
8864 8999 key = &tupregs[DIF_DTR_NREGS];
8865 9000 key[0].dttk_size = 0;
8866 9001 key[1].dttk_size = 0;
8867 9002 nkeys = 2;
8868 9003 scope = DIFV_SCOPE_THREAD;
8869 9004 break;
8870 9005
8871 9006 case DIF_OP_STGAA:
8872 9007 case DIF_OP_STTAA:
8873 9008 nkeys = ttop;
8874 9009
8875 9010 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
8876 9011 key[nkeys++].dttk_size = 0;
8877 9012
8878 9013 key[nkeys++].dttk_size = 0;
8879 9014
8880 9015 if (op == DIF_OP_STTAA) {
8881 9016 scope = DIFV_SCOPE_THREAD;
8882 9017 } else {
8883 9018 scope = DIFV_SCOPE_GLOBAL;
8884 9019 }
8885 9020
8886 9021 break;
8887 9022
8888 9023 case DIF_OP_PUSHTR:
8889 9024 if (ttop == DIF_DTR_NREGS)
8890 9025 return;
8891 9026
8892 9027 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
8893 9028 /*
8894 9029 * If the register for the size of the "pushtr"
8895 9030 * is %r0 (or the value is 0) and the type is
8896 9031 * a string, we'll use the system-wide default
8897 9032 * string size.
8898 9033 */
8899 9034 tupregs[ttop++].dttk_size =
8900 9035 dtrace_strsize_default;
8901 9036 } else {
8902 9037 if (srd == 0)
8903 9038 return;
8904 9039
8905 9040 tupregs[ttop++].dttk_size = sval;
8906 9041 }
8907 9042
8908 9043 break;
8909 9044
8910 9045 case DIF_OP_PUSHTV:
8911 9046 if (ttop == DIF_DTR_NREGS)
8912 9047 return;
8913 9048
8914 9049 tupregs[ttop++].dttk_size = 0;
8915 9050 break;
8916 9051
8917 9052 case DIF_OP_FLUSHTS:
8918 9053 ttop = 0;
8919 9054 break;
8920 9055
8921 9056 case DIF_OP_POPTS:
8922 9057 if (ttop != 0)
8923 9058 ttop--;
8924 9059 break;
8925 9060 }
8926 9061
8927 9062 sval = 0;
8928 9063 srd = 0;
8929 9064
8930 9065 if (nkeys == 0)
8931 9066 continue;
8932 9067
8933 9068 /*
8934 9069 * We have a dynamic variable allocation; calculate its size.
8935 9070 */
8936 9071 for (ksize = 0, i = 0; i < nkeys; i++)
8937 9072 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
8938 9073
8939 9074 size = sizeof (dtrace_dynvar_t);
8940 9075 size += sizeof (dtrace_key_t) * (nkeys - 1);
8941 9076 size += ksize;
8942 9077
8943 9078 /*
8944 9079 * Now we need to determine the size of the stored data.
8945 9080 */
8946 9081 id = DIF_INSTR_VAR(instr);
8947 9082
8948 9083 for (i = 0; i < dp->dtdo_varlen; i++) {
8949 9084 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8950 9085
8951 9086 if (v->dtdv_id == id && v->dtdv_scope == scope) {
8952 9087 size += v->dtdv_type.dtdt_size;
8953 9088 break;
8954 9089 }
8955 9090 }
8956 9091
8957 9092 if (i == dp->dtdo_varlen)
8958 9093 return;
8959 9094
8960 9095 /*
8961 9096 * We have the size. If this is larger than the chunk size
8962 9097 * for our dynamic variable state, reset the chunk size.
8963 9098 */
8964 9099 size = P2ROUNDUP(size, sizeof (uint64_t));
8965 9100
8966 9101 if (size > vstate->dtvs_dynvars.dtds_chunksize)
8967 9102 vstate->dtvs_dynvars.dtds_chunksize = size;
8968 9103 }
8969 9104 }
8970 9105
8971 9106 static void
8972 9107 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
8973 9108 {
8974 9109 int i, oldsvars, osz, nsz, otlocals, ntlocals;
8975 9110 uint_t id;
8976 9111
8977 9112 ASSERT(MUTEX_HELD(&dtrace_lock));
8978 9113 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
8979 9114
8980 9115 for (i = 0; i < dp->dtdo_varlen; i++) {
8981 9116 dtrace_difv_t *v = &dp->dtdo_vartab[i];
8982 9117 dtrace_statvar_t *svar, ***svarp;
8983 9118 size_t dsize = 0;
8984 9119 uint8_t scope = v->dtdv_scope;
8985 9120 int *np;
8986 9121
8987 9122 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
8988 9123 continue;
8989 9124
8990 9125 id -= DIF_VAR_OTHER_UBASE;
8991 9126
8992 9127 switch (scope) {
8993 9128 case DIFV_SCOPE_THREAD:
8994 9129 while (id >= (otlocals = vstate->dtvs_ntlocals)) {
8995 9130 dtrace_difv_t *tlocals;
8996 9131
8997 9132 if ((ntlocals = (otlocals << 1)) == 0)
8998 9133 ntlocals = 1;
8999 9134
9000 9135 osz = otlocals * sizeof (dtrace_difv_t);
9001 9136 nsz = ntlocals * sizeof (dtrace_difv_t);
9002 9137
9003 9138 tlocals = kmem_zalloc(nsz, KM_SLEEP);
9004 9139
9005 9140 if (osz != 0) {
9006 9141 bcopy(vstate->dtvs_tlocals,
9007 9142 tlocals, osz);
9008 9143 kmem_free(vstate->dtvs_tlocals, osz);
9009 9144 }
9010 9145
9011 9146 vstate->dtvs_tlocals = tlocals;
9012 9147 vstate->dtvs_ntlocals = ntlocals;
9013 9148 }
9014 9149
9015 9150 vstate->dtvs_tlocals[id] = *v;
9016 9151 continue;
9017 9152
9018 9153 case DIFV_SCOPE_LOCAL:
9019 9154 np = &vstate->dtvs_nlocals;
9020 9155 svarp = &vstate->dtvs_locals;
9021 9156
9022 9157 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
9023 9158 dsize = NCPU * (v->dtdv_type.dtdt_size +
9024 9159 sizeof (uint64_t));
9025 9160 else
9026 9161 dsize = NCPU * sizeof (uint64_t);
9027 9162
9028 9163 break;
9029 9164
9030 9165 case DIFV_SCOPE_GLOBAL:
9031 9166 np = &vstate->dtvs_nglobals;
9032 9167 svarp = &vstate->dtvs_globals;
9033 9168
9034 9169 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
9035 9170 dsize = v->dtdv_type.dtdt_size +
9036 9171 sizeof (uint64_t);
9037 9172
9038 9173 break;
9039 9174
9040 9175 default:
9041 9176 ASSERT(0);
9042 9177 }
9043 9178
9044 9179 while (id >= (oldsvars = *np)) {
9045 9180 dtrace_statvar_t **statics;
9046 9181 int newsvars, oldsize, newsize;
9047 9182
9048 9183 if ((newsvars = (oldsvars << 1)) == 0)
9049 9184 newsvars = 1;
9050 9185
9051 9186 oldsize = oldsvars * sizeof (dtrace_statvar_t *);
9052 9187 newsize = newsvars * sizeof (dtrace_statvar_t *);
9053 9188
9054 9189 statics = kmem_zalloc(newsize, KM_SLEEP);
9055 9190
9056 9191 if (oldsize != 0) {
9057 9192 bcopy(*svarp, statics, oldsize);
9058 9193 kmem_free(*svarp, oldsize);
9059 9194 }
9060 9195
9061 9196 *svarp = statics;
9062 9197 *np = newsvars;
9063 9198 }
9064 9199
9065 9200 if ((svar = (*svarp)[id]) == NULL) {
9066 9201 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
9067 9202 svar->dtsv_var = *v;
9068 9203
9069 9204 if ((svar->dtsv_size = dsize) != 0) {
9070 9205 svar->dtsv_data = (uint64_t)(uintptr_t)
9071 9206 kmem_zalloc(dsize, KM_SLEEP);
9072 9207 }
9073 9208
9074 9209 (*svarp)[id] = svar;
9075 9210 }
9076 9211
9077 9212 svar->dtsv_refcnt++;
9078 9213 }
9079 9214
9080 9215 dtrace_difo_chunksize(dp, vstate);
9081 9216 dtrace_difo_hold(dp);
9082 9217 }
9083 9218
9084 9219 static dtrace_difo_t *
9085 9220 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9086 9221 {
9087 9222 dtrace_difo_t *new;
9088 9223 size_t sz;
9089 9224
9090 9225 ASSERT(dp->dtdo_buf != NULL);
9091 9226 ASSERT(dp->dtdo_refcnt != 0);
9092 9227
9093 9228 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
9094 9229
9095 9230 ASSERT(dp->dtdo_buf != NULL);
9096 9231 sz = dp->dtdo_len * sizeof (dif_instr_t);
9097 9232 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
9098 9233 bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
9099 9234 new->dtdo_len = dp->dtdo_len;
9100 9235
9101 9236 if (dp->dtdo_strtab != NULL) {
9102 9237 ASSERT(dp->dtdo_strlen != 0);
9103 9238 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
9104 9239 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
9105 9240 new->dtdo_strlen = dp->dtdo_strlen;
9106 9241 }
9107 9242
9108 9243 if (dp->dtdo_inttab != NULL) {
9109 9244 ASSERT(dp->dtdo_intlen != 0);
9110 9245 sz = dp->dtdo_intlen * sizeof (uint64_t);
9111 9246 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
9112 9247 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
9113 9248 new->dtdo_intlen = dp->dtdo_intlen;
9114 9249 }
9115 9250
9116 9251 if (dp->dtdo_vartab != NULL) {
9117 9252 ASSERT(dp->dtdo_varlen != 0);
9118 9253 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
9119 9254 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
9120 9255 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
9121 9256 new->dtdo_varlen = dp->dtdo_varlen;
9122 9257 }
9123 9258
9124 9259 dtrace_difo_init(new, vstate);
9125 9260 return (new);
9126 9261 }
9127 9262
9128 9263 static void
9129 9264 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9130 9265 {
9131 9266 int i;
9132 9267
9133 9268 ASSERT(dp->dtdo_refcnt == 0);
9134 9269
9135 9270 for (i = 0; i < dp->dtdo_varlen; i++) {
9136 9271 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9137 9272 dtrace_statvar_t *svar, **svarp;
9138 9273 uint_t id;
9139 9274 uint8_t scope = v->dtdv_scope;
9140 9275 int *np;
9141 9276
9142 9277 switch (scope) {
9143 9278 case DIFV_SCOPE_THREAD:
9144 9279 continue;
9145 9280
9146 9281 case DIFV_SCOPE_LOCAL:
9147 9282 np = &vstate->dtvs_nlocals;
9148 9283 svarp = vstate->dtvs_locals;
9149 9284 break;
9150 9285
9151 9286 case DIFV_SCOPE_GLOBAL:
9152 9287 np = &vstate->dtvs_nglobals;
9153 9288 svarp = vstate->dtvs_globals;
9154 9289 break;
9155 9290
9156 9291 default:
9157 9292 ASSERT(0);
9158 9293 }
9159 9294
9160 9295 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9161 9296 continue;
9162 9297
9163 9298 id -= DIF_VAR_OTHER_UBASE;
9164 9299 ASSERT(id < *np);
9165 9300
9166 9301 svar = svarp[id];
9167 9302 ASSERT(svar != NULL);
9168 9303 ASSERT(svar->dtsv_refcnt > 0);
9169 9304
9170 9305 if (--svar->dtsv_refcnt > 0)
9171 9306 continue;
9172 9307
9173 9308 if (svar->dtsv_size != 0) {
9174 9309 ASSERT(svar->dtsv_data != NULL);
9175 9310 kmem_free((void *)(uintptr_t)svar->dtsv_data,
9176 9311 svar->dtsv_size);
9177 9312 }
9178 9313
9179 9314 kmem_free(svar, sizeof (dtrace_statvar_t));
9180 9315 svarp[id] = NULL;
9181 9316 }
9182 9317
9183 9318 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
9184 9319 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
9185 9320 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
9186 9321 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
9187 9322
9188 9323 kmem_free(dp, sizeof (dtrace_difo_t));
9189 9324 }
9190 9325
9191 9326 static void
9192 9327 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9193 9328 {
9194 9329 int i;
9195 9330
9196 9331 ASSERT(MUTEX_HELD(&dtrace_lock));
9197 9332 ASSERT(dp->dtdo_refcnt != 0);
9198 9333
9199 9334 for (i = 0; i < dp->dtdo_varlen; i++) {
9200 9335 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9201 9336
9202 9337 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9203 9338 continue;
9204 9339
9205 9340 ASSERT(dtrace_vtime_references > 0);
9206 9341 if (--dtrace_vtime_references == 0)
9207 9342 dtrace_vtime_disable();
9208 9343 }
9209 9344
9210 9345 if (--dp->dtdo_refcnt == 0)
9211 9346 dtrace_difo_destroy(dp, vstate);
9212 9347 }
9213 9348
9214 9349 /*
9215 9350 * DTrace Format Functions
9216 9351 */
9217 9352 static uint16_t
9218 9353 dtrace_format_add(dtrace_state_t *state, char *str)
9219 9354 {
9220 9355 char *fmt, **new;
9221 9356 uint16_t ndx, len = strlen(str) + 1;
9222 9357
9223 9358 fmt = kmem_zalloc(len, KM_SLEEP);
9224 9359 bcopy(str, fmt, len);
9225 9360
9226 9361 for (ndx = 0; ndx < state->dts_nformats; ndx++) {
9227 9362 if (state->dts_formats[ndx] == NULL) {
9228 9363 state->dts_formats[ndx] = fmt;
9229 9364 return (ndx + 1);
9230 9365 }
9231 9366 }
9232 9367
9233 9368 if (state->dts_nformats == USHRT_MAX) {
9234 9369 /*
9235 9370 * This is only likely if a denial-of-service attack is being
9236 9371 * attempted. As such, it's okay to fail silently here.
9237 9372 */
9238 9373 kmem_free(fmt, len);
9239 9374 return (0);
9240 9375 }
9241 9376
9242 9377 /*
9243 9378 * For simplicity, we always resize the formats array to be exactly the
9244 9379 * number of formats.
9245 9380 */
9246 9381 ndx = state->dts_nformats++;
9247 9382 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
9248 9383
9249 9384 if (state->dts_formats != NULL) {
9250 9385 ASSERT(ndx != 0);
9251 9386 bcopy(state->dts_formats, new, ndx * sizeof (char *));
9252 9387 kmem_free(state->dts_formats, ndx * sizeof (char *));
9253 9388 }
9254 9389
9255 9390 state->dts_formats = new;
9256 9391 state->dts_formats[ndx] = fmt;
9257 9392
9258 9393 return (ndx + 1);
9259 9394 }
9260 9395
9261 9396 static void
9262 9397 dtrace_format_remove(dtrace_state_t *state, uint16_t format)
9263 9398 {
9264 9399 char *fmt;
9265 9400
9266 9401 ASSERT(state->dts_formats != NULL);
9267 9402 ASSERT(format <= state->dts_nformats);
9268 9403 ASSERT(state->dts_formats[format - 1] != NULL);
9269 9404
9270 9405 fmt = state->dts_formats[format - 1];
9271 9406 kmem_free(fmt, strlen(fmt) + 1);
9272 9407 state->dts_formats[format - 1] = NULL;
9273 9408 }
9274 9409
9275 9410 static void
9276 9411 dtrace_format_destroy(dtrace_state_t *state)
9277 9412 {
9278 9413 int i;
9279 9414
9280 9415 if (state->dts_nformats == 0) {
9281 9416 ASSERT(state->dts_formats == NULL);
9282 9417 return;
9283 9418 }
9284 9419
9285 9420 ASSERT(state->dts_formats != NULL);
9286 9421
9287 9422 for (i = 0; i < state->dts_nformats; i++) {
9288 9423 char *fmt = state->dts_formats[i];
9289 9424
9290 9425 if (fmt == NULL)
9291 9426 continue;
9292 9427
9293 9428 kmem_free(fmt, strlen(fmt) + 1);
9294 9429 }
9295 9430
9296 9431 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
9297 9432 state->dts_nformats = 0;
9298 9433 state->dts_formats = NULL;
9299 9434 }
9300 9435
9301 9436 /*
9302 9437 * DTrace Predicate Functions
9303 9438 */
9304 9439 static dtrace_predicate_t *
9305 9440 dtrace_predicate_create(dtrace_difo_t *dp)
9306 9441 {
9307 9442 dtrace_predicate_t *pred;
9308 9443
9309 9444 ASSERT(MUTEX_HELD(&dtrace_lock));
9310 9445 ASSERT(dp->dtdo_refcnt != 0);
9311 9446
9312 9447 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
9313 9448 pred->dtp_difo = dp;
9314 9449 pred->dtp_refcnt = 1;
9315 9450
9316 9451 if (!dtrace_difo_cacheable(dp))
9317 9452 return (pred);
9318 9453
9319 9454 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
9320 9455 /*
9321 9456 * This is only theoretically possible -- we have had 2^32
9322 9457 * cacheable predicates on this machine. We cannot allow any
9323 9458 * more predicates to become cacheable: as unlikely as it is,
9324 9459 * there may be a thread caching a (now stale) predicate cache
9325 9460 * ID. (N.B.: the temptation is being successfully resisted to
9326 9461 * have this cmn_err() "Holy shit -- we executed this code!")
9327 9462 */
9328 9463 return (pred);
9329 9464 }
9330 9465
9331 9466 pred->dtp_cacheid = dtrace_predcache_id++;
9332 9467
9333 9468 return (pred);
9334 9469 }
9335 9470
9336 9471 static void
9337 9472 dtrace_predicate_hold(dtrace_predicate_t *pred)
9338 9473 {
9339 9474 ASSERT(MUTEX_HELD(&dtrace_lock));
9340 9475 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
9341 9476 ASSERT(pred->dtp_refcnt > 0);
9342 9477
9343 9478 pred->dtp_refcnt++;
9344 9479 }
9345 9480
9346 9481 static void
9347 9482 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
9348 9483 {
9349 9484 dtrace_difo_t *dp = pred->dtp_difo;
9350 9485
9351 9486 ASSERT(MUTEX_HELD(&dtrace_lock));
9352 9487 ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
9353 9488 ASSERT(pred->dtp_refcnt > 0);
9354 9489
9355 9490 if (--pred->dtp_refcnt == 0) {
9356 9491 dtrace_difo_release(pred->dtp_difo, vstate);
9357 9492 kmem_free(pred, sizeof (dtrace_predicate_t));
9358 9493 }
9359 9494 }
9360 9495
9361 9496 /*
9362 9497 * DTrace Action Description Functions
9363 9498 */
9364 9499 static dtrace_actdesc_t *
9365 9500 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
9366 9501 uint64_t uarg, uint64_t arg)
9367 9502 {
9368 9503 dtrace_actdesc_t *act;
9369 9504
9370 9505 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
9371 9506 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
9372 9507
9373 9508 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
9374 9509 act->dtad_kind = kind;
9375 9510 act->dtad_ntuple = ntuple;
9376 9511 act->dtad_uarg = uarg;
9377 9512 act->dtad_arg = arg;
9378 9513 act->dtad_refcnt = 1;
9379 9514
9380 9515 return (act);
9381 9516 }
9382 9517
9383 9518 static void
9384 9519 dtrace_actdesc_hold(dtrace_actdesc_t *act)
9385 9520 {
9386 9521 ASSERT(act->dtad_refcnt >= 1);
9387 9522 act->dtad_refcnt++;
9388 9523 }
9389 9524
9390 9525 static void
9391 9526 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
9392 9527 {
9393 9528 dtrace_actkind_t kind = act->dtad_kind;
9394 9529 dtrace_difo_t *dp;
9395 9530
9396 9531 ASSERT(act->dtad_refcnt >= 1);
9397 9532
9398 9533 if (--act->dtad_refcnt != 0)
9399 9534 return;
9400 9535
9401 9536 if ((dp = act->dtad_difo) != NULL)
9402 9537 dtrace_difo_release(dp, vstate);
9403 9538
9404 9539 if (DTRACEACT_ISPRINTFLIKE(kind)) {
9405 9540 char *str = (char *)(uintptr_t)act->dtad_arg;
9406 9541
9407 9542 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
9408 9543 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
9409 9544
9410 9545 if (str != NULL)
9411 9546 kmem_free(str, strlen(str) + 1);
9412 9547 }
9413 9548
9414 9549 kmem_free(act, sizeof (dtrace_actdesc_t));
9415 9550 }
9416 9551
9417 9552 /*
9418 9553 * DTrace ECB Functions
9419 9554 */
9420 9555 static dtrace_ecb_t *
9421 9556 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
9422 9557 {
9423 9558 dtrace_ecb_t *ecb;
9424 9559 dtrace_epid_t epid;
9425 9560
9426 9561 ASSERT(MUTEX_HELD(&dtrace_lock));
9427 9562
9428 9563 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
9429 9564 ecb->dte_predicate = NULL;
9430 9565 ecb->dte_probe = probe;
9431 9566
9432 9567 /*
9433 9568 * The default size is the size of the default action: recording
9434 9569 * the header.
9435 9570 */
9436 9571 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
9437 9572 ecb->dte_alignment = sizeof (dtrace_epid_t);
9438 9573
9439 9574 epid = state->dts_epid++;
9440 9575
9441 9576 if (epid - 1 >= state->dts_necbs) {
9442 9577 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
9443 9578 int necbs = state->dts_necbs << 1;
9444 9579
9445 9580 ASSERT(epid == state->dts_necbs + 1);
9446 9581
9447 9582 if (necbs == 0) {
9448 9583 ASSERT(oecbs == NULL);
9449 9584 necbs = 1;
9450 9585 }
9451 9586
9452 9587 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
9453 9588
9454 9589 if (oecbs != NULL)
9455 9590 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
9456 9591
9457 9592 dtrace_membar_producer();
9458 9593 state->dts_ecbs = ecbs;
9459 9594
9460 9595 if (oecbs != NULL) {
9461 9596 /*
9462 9597 * If this state is active, we must dtrace_sync()
9463 9598 * before we can free the old dts_ecbs array: we're
9464 9599 * coming in hot, and there may be active ring
9465 9600 * buffer processing (which indexes into the dts_ecbs
9466 9601 * array) on another CPU.
9467 9602 */
9468 9603 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
9469 9604 dtrace_sync();
9470 9605
9471 9606 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
9472 9607 }
9473 9608
9474 9609 dtrace_membar_producer();
9475 9610 state->dts_necbs = necbs;
9476 9611 }
9477 9612
9478 9613 ecb->dte_state = state;
9479 9614
9480 9615 ASSERT(state->dts_ecbs[epid - 1] == NULL);
9481 9616 dtrace_membar_producer();
9482 9617 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
9483 9618
9484 9619 return (ecb);
9485 9620 }
9486 9621
9487 9622 static int
9488 9623 dtrace_ecb_enable(dtrace_ecb_t *ecb)
9489 9624 {
9490 9625 dtrace_probe_t *probe = ecb->dte_probe;
9491 9626
9492 9627 ASSERT(MUTEX_HELD(&cpu_lock));
9493 9628 ASSERT(MUTEX_HELD(&dtrace_lock));
9494 9629 ASSERT(ecb->dte_next == NULL);
9495 9630
9496 9631 if (probe == NULL) {
9497 9632 /*
9498 9633 * This is the NULL probe -- there's nothing to do.
9499 9634 */
9500 9635 return (0);
9501 9636 }
9502 9637
9503 9638 if (probe->dtpr_ecb == NULL) {
9504 9639 dtrace_provider_t *prov = probe->dtpr_provider;
9505 9640
9506 9641 /*
9507 9642 * We're the first ECB on this probe.
9508 9643 */
9509 9644 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
9510 9645
9511 9646 if (ecb->dte_predicate != NULL)
9512 9647 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
9513 9648
9514 9649 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
9515 9650 probe->dtpr_id, probe->dtpr_arg));
9516 9651 } else {
9517 9652 /*
9518 9653 * This probe is already active. Swing the last pointer to
9519 9654 * point to the new ECB, and issue a dtrace_sync() to assure
9520 9655 * that all CPUs have seen the change.
9521 9656 */
9522 9657 ASSERT(probe->dtpr_ecb_last != NULL);
9523 9658 probe->dtpr_ecb_last->dte_next = ecb;
9524 9659 probe->dtpr_ecb_last = ecb;
9525 9660 probe->dtpr_predcache = 0;
9526 9661
9527 9662 dtrace_sync();
9528 9663 return (0);
9529 9664 }
9530 9665 }
9531 9666
9532 9667 static void
9533 9668 dtrace_ecb_resize(dtrace_ecb_t *ecb)
9534 9669 {
9535 9670 dtrace_action_t *act;
9536 9671 uint32_t curneeded = UINT32_MAX;
9537 9672 uint32_t aggbase = UINT32_MAX;
9538 9673
9539 9674 /*
9540 9675 * If we record anything, we always record the dtrace_rechdr_t. (And
9541 9676 * we always record it first.)
9542 9677 */
9543 9678 ecb->dte_size = sizeof (dtrace_rechdr_t);
9544 9679 ecb->dte_alignment = sizeof (dtrace_epid_t);
9545 9680
9546 9681 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
9547 9682 dtrace_recdesc_t *rec = &act->dta_rec;
9548 9683 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
9549 9684
9550 9685 ecb->dte_alignment = MAX(ecb->dte_alignment,
9551 9686 rec->dtrd_alignment);
9552 9687
9553 9688 if (DTRACEACT_ISAGG(act->dta_kind)) {
9554 9689 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
9555 9690
9556 9691 ASSERT(rec->dtrd_size != 0);
9557 9692 ASSERT(agg->dtag_first != NULL);
9558 9693 ASSERT(act->dta_prev->dta_intuple);
9559 9694 ASSERT(aggbase != UINT32_MAX);
9560 9695 ASSERT(curneeded != UINT32_MAX);
9561 9696
9562 9697 agg->dtag_base = aggbase;
9563 9698
9564 9699 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
9565 9700 rec->dtrd_offset = curneeded;
9566 9701 curneeded += rec->dtrd_size;
9567 9702 ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
9568 9703
9569 9704 aggbase = UINT32_MAX;
9570 9705 curneeded = UINT32_MAX;
9571 9706 } else if (act->dta_intuple) {
9572 9707 if (curneeded == UINT32_MAX) {
9573 9708 /*
9574 9709 * This is the first record in a tuple. Align
9575 9710 * curneeded to be at offset 4 in an 8-byte
9576 9711 * aligned block.
9577 9712 */
9578 9713 ASSERT(act->dta_prev == NULL ||
9579 9714 !act->dta_prev->dta_intuple);
9580 9715 ASSERT3U(aggbase, ==, UINT32_MAX);
9581 9716 curneeded = P2PHASEUP(ecb->dte_size,
9582 9717 sizeof (uint64_t), sizeof (dtrace_aggid_t));
9583 9718
9584 9719 aggbase = curneeded - sizeof (dtrace_aggid_t);
9585 9720 ASSERT(IS_P2ALIGNED(aggbase,
9586 9721 sizeof (uint64_t)));
9587 9722 }
9588 9723 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
9589 9724 rec->dtrd_offset = curneeded;
9590 9725 curneeded += rec->dtrd_size;
9591 9726 } else {
9592 9727 /* tuples must be followed by an aggregation */
9593 9728 ASSERT(act->dta_prev == NULL ||
9594 9729 !act->dta_prev->dta_intuple);
9595 9730
9596 9731 ecb->dte_size = P2ROUNDUP(ecb->dte_size,
9597 9732 rec->dtrd_alignment);
9598 9733 rec->dtrd_offset = ecb->dte_size;
9599 9734 ecb->dte_size += rec->dtrd_size;
9600 9735 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
9601 9736 }
9602 9737 }
9603 9738
9604 9739 if ((act = ecb->dte_action) != NULL &&
9605 9740 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
9606 9741 ecb->dte_size == sizeof (dtrace_rechdr_t)) {
9607 9742 /*
9608 9743 * If the size is still sizeof (dtrace_rechdr_t), then all
9609 9744 * actions store no data; set the size to 0.
9610 9745 */
9611 9746 ecb->dte_size = 0;
9612 9747 }
9613 9748
9614 9749 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
9615 9750 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
9616 9751 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
9617 9752 ecb->dte_needed);
9618 9753 }
9619 9754
9620 9755 static dtrace_action_t *
9621 9756 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
9622 9757 {
9623 9758 dtrace_aggregation_t *agg;
9624 9759 size_t size = sizeof (uint64_t);
9625 9760 int ntuple = desc->dtad_ntuple;
9626 9761 dtrace_action_t *act;
9627 9762 dtrace_recdesc_t *frec;
9628 9763 dtrace_aggid_t aggid;
9629 9764 dtrace_state_t *state = ecb->dte_state;
9630 9765
9631 9766 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
9632 9767 agg->dtag_ecb = ecb;
9633 9768
9634 9769 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
9635 9770
9636 9771 switch (desc->dtad_kind) {
9637 9772 case DTRACEAGG_MIN:
9638 9773 agg->dtag_initial = INT64_MAX;
9639 9774 agg->dtag_aggregate = dtrace_aggregate_min;
9640 9775 break;
9641 9776
9642 9777 case DTRACEAGG_MAX:
9643 9778 agg->dtag_initial = INT64_MIN;
9644 9779 agg->dtag_aggregate = dtrace_aggregate_max;
9645 9780 break;
9646 9781
9647 9782 case DTRACEAGG_COUNT:
9648 9783 agg->dtag_aggregate = dtrace_aggregate_count;
9649 9784 break;
9650 9785
9651 9786 case DTRACEAGG_QUANTIZE:
9652 9787 agg->dtag_aggregate = dtrace_aggregate_quantize;
9653 9788 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
9654 9789 sizeof (uint64_t);
9655 9790 break;
9656 9791
9657 9792 case DTRACEAGG_LQUANTIZE: {
9658 9793 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
9659 9794 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
9660 9795
9661 9796 agg->dtag_initial = desc->dtad_arg;
9662 9797 agg->dtag_aggregate = dtrace_aggregate_lquantize;
9663 9798
9664 9799 if (step == 0 || levels == 0)
9665 9800 goto err;
9666 9801
9667 9802 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
9668 9803 break;
9669 9804 }
9670 9805
9671 9806 case DTRACEAGG_LLQUANTIZE: {
9672 9807 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
9673 9808 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
9674 9809 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
9675 9810 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
9676 9811 int64_t v;
9677 9812
9678 9813 agg->dtag_initial = desc->dtad_arg;
9679 9814 agg->dtag_aggregate = dtrace_aggregate_llquantize;
9680 9815
9681 9816 if (factor < 2 || low >= high || nsteps < factor)
9682 9817 goto err;
9683 9818
9684 9819 /*
9685 9820 * Now check that the number of steps evenly divides a power
9686 9821 * of the factor. (This assures both integer bucket size and
9687 9822 * linearity within each magnitude.)
9688 9823 */
9689 9824 for (v = factor; v < nsteps; v *= factor)
9690 9825 continue;
9691 9826
9692 9827 if ((v % nsteps) || (nsteps % factor))
9693 9828 goto err;
9694 9829
9695 9830 size = (dtrace_aggregate_llquantize_bucket(factor,
9696 9831 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
9697 9832 break;
9698 9833 }
9699 9834
9700 9835 case DTRACEAGG_AVG:
9701 9836 agg->dtag_aggregate = dtrace_aggregate_avg;
9702 9837 size = sizeof (uint64_t) * 2;
9703 9838 break;
9704 9839
9705 9840 case DTRACEAGG_STDDEV:
9706 9841 agg->dtag_aggregate = dtrace_aggregate_stddev;
9707 9842 size = sizeof (uint64_t) * 4;
9708 9843 break;
9709 9844
9710 9845 case DTRACEAGG_SUM:
9711 9846 agg->dtag_aggregate = dtrace_aggregate_sum;
9712 9847 break;
9713 9848
9714 9849 default:
9715 9850 goto err;
9716 9851 }
9717 9852
9718 9853 agg->dtag_action.dta_rec.dtrd_size = size;
9719 9854
9720 9855 if (ntuple == 0)
9721 9856 goto err;
9722 9857
9723 9858 /*
9724 9859 * We must make sure that we have enough actions for the n-tuple.
9725 9860 */
9726 9861 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
9727 9862 if (DTRACEACT_ISAGG(act->dta_kind))
9728 9863 break;
9729 9864
9730 9865 if (--ntuple == 0) {
9731 9866 /*
9732 9867 * This is the action with which our n-tuple begins.
9733 9868 */
9734 9869 agg->dtag_first = act;
9735 9870 goto success;
9736 9871 }
9737 9872 }
9738 9873
9739 9874 /*
9740 9875 * This n-tuple is short by ntuple elements. Return failure.
9741 9876 */
9742 9877 ASSERT(ntuple != 0);
9743 9878 err:
9744 9879 kmem_free(agg, sizeof (dtrace_aggregation_t));
9745 9880 return (NULL);
9746 9881
9747 9882 success:
9748 9883 /*
9749 9884 * If the last action in the tuple has a size of zero, it's actually
9750 9885 * an expression argument for the aggregating action.
9751 9886 */
9752 9887 ASSERT(ecb->dte_action_last != NULL);
9753 9888 act = ecb->dte_action_last;
9754 9889
9755 9890 if (act->dta_kind == DTRACEACT_DIFEXPR) {
9756 9891 ASSERT(act->dta_difo != NULL);
9757 9892
9758 9893 if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
9759 9894 agg->dtag_hasarg = 1;
9760 9895 }
9761 9896
9762 9897 /*
9763 9898 * We need to allocate an id for this aggregation.
9764 9899 */
9765 9900 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
9766 9901 VM_BESTFIT | VM_SLEEP);
9767 9902
9768 9903 if (aggid - 1 >= state->dts_naggregations) {
9769 9904 dtrace_aggregation_t **oaggs = state->dts_aggregations;
9770 9905 dtrace_aggregation_t **aggs;
9771 9906 int naggs = state->dts_naggregations << 1;
9772 9907 int onaggs = state->dts_naggregations;
9773 9908
9774 9909 ASSERT(aggid == state->dts_naggregations + 1);
9775 9910
9776 9911 if (naggs == 0) {
9777 9912 ASSERT(oaggs == NULL);
9778 9913 naggs = 1;
9779 9914 }
9780 9915
9781 9916 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
9782 9917
9783 9918 if (oaggs != NULL) {
9784 9919 bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
9785 9920 kmem_free(oaggs, onaggs * sizeof (*aggs));
9786 9921 }
9787 9922
9788 9923 state->dts_aggregations = aggs;
9789 9924 state->dts_naggregations = naggs;
9790 9925 }
9791 9926
9792 9927 ASSERT(state->dts_aggregations[aggid - 1] == NULL);
9793 9928 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
9794 9929
9795 9930 frec = &agg->dtag_first->dta_rec;
9796 9931 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
9797 9932 frec->dtrd_alignment = sizeof (dtrace_aggid_t);
9798 9933
9799 9934 for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
9800 9935 ASSERT(!act->dta_intuple);
9801 9936 act->dta_intuple = 1;
9802 9937 }
9803 9938
9804 9939 return (&agg->dtag_action);
9805 9940 }
9806 9941
9807 9942 static void
9808 9943 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
9809 9944 {
9810 9945 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
9811 9946 dtrace_state_t *state = ecb->dte_state;
9812 9947 dtrace_aggid_t aggid = agg->dtag_id;
9813 9948
9814 9949 ASSERT(DTRACEACT_ISAGG(act->dta_kind));
9815 9950 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
9816 9951
9817 9952 ASSERT(state->dts_aggregations[aggid - 1] == agg);
9818 9953 state->dts_aggregations[aggid - 1] = NULL;
9819 9954
9820 9955 kmem_free(agg, sizeof (dtrace_aggregation_t));
9821 9956 }
9822 9957
9823 9958 static int
9824 9959 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
9825 9960 {
9826 9961 dtrace_action_t *action, *last;
9827 9962 dtrace_difo_t *dp = desc->dtad_difo;
9828 9963 uint32_t size = 0, align = sizeof (uint8_t), mask;
9829 9964 uint16_t format = 0;
9830 9965 dtrace_recdesc_t *rec;
9831 9966 dtrace_state_t *state = ecb->dte_state;
9832 9967 dtrace_optval_t *opt = state->dts_options, nframes, strsize;
9833 9968 uint64_t arg = desc->dtad_arg;
9834 9969
9835 9970 ASSERT(MUTEX_HELD(&dtrace_lock));
9836 9971 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
9837 9972
9838 9973 if (DTRACEACT_ISAGG(desc->dtad_kind)) {
9839 9974 /*
9840 9975 * If this is an aggregating action, there must be neither
9841 9976 * a speculate nor a commit on the action chain.
9842 9977 */
9843 9978 dtrace_action_t *act;
9844 9979
9845 9980 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
9846 9981 if (act->dta_kind == DTRACEACT_COMMIT)
9847 9982 return (EINVAL);
9848 9983
9849 9984 if (act->dta_kind == DTRACEACT_SPECULATE)
9850 9985 return (EINVAL);
9851 9986 }
9852 9987
9853 9988 action = dtrace_ecb_aggregation_create(ecb, desc);
9854 9989
9855 9990 if (action == NULL)
9856 9991 return (EINVAL);
9857 9992 } else {
9858 9993 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
9859 9994 (desc->dtad_kind == DTRACEACT_DIFEXPR &&
9860 9995 dp != NULL && dp->dtdo_destructive)) {
9861 9996 state->dts_destructive = 1;
9862 9997 }
9863 9998
9864 9999 switch (desc->dtad_kind) {
9865 10000 case DTRACEACT_PRINTF:
9866 10001 case DTRACEACT_PRINTA:
9867 10002 case DTRACEACT_SYSTEM:
9868 10003 case DTRACEACT_FREOPEN:
9869 10004 case DTRACEACT_DIFEXPR:
9870 10005 /*
9871 10006 * We know that our arg is a string -- turn it into a
9872 10007 * format.
9873 10008 */
9874 10009 if (arg == NULL) {
9875 10010 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
9876 10011 desc->dtad_kind == DTRACEACT_DIFEXPR);
9877 10012 format = 0;
9878 10013 } else {
9879 10014 ASSERT(arg != NULL);
9880 10015 ASSERT(arg > KERNELBASE);
9881 10016 format = dtrace_format_add(state,
9882 10017 (char *)(uintptr_t)arg);
9883 10018 }
9884 10019
9885 10020 /*FALLTHROUGH*/
9886 10021 case DTRACEACT_LIBACT:
9887 10022 case DTRACEACT_TRACEMEM:
9888 10023 case DTRACEACT_TRACEMEM_DYNSIZE:
9889 10024 if (dp == NULL)
9890 10025 return (EINVAL);
9891 10026
9892 10027 if ((size = dp->dtdo_rtype.dtdt_size) != 0)
9893 10028 break;
9894 10029
9895 10030 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
9896 10031 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9897 10032 return (EINVAL);
9898 10033
9899 10034 size = opt[DTRACEOPT_STRSIZE];
9900 10035 }
9901 10036
9902 10037 break;
9903 10038
9904 10039 case DTRACEACT_STACK:
9905 10040 if ((nframes = arg) == 0) {
9906 10041 nframes = opt[DTRACEOPT_STACKFRAMES];
9907 10042 ASSERT(nframes > 0);
9908 10043 arg = nframes;
9909 10044 }
9910 10045
9911 10046 size = nframes * sizeof (pc_t);
9912 10047 break;
9913 10048
9914 10049 case DTRACEACT_JSTACK:
9915 10050 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
9916 10051 strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
9917 10052
9918 10053 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
9919 10054 nframes = opt[DTRACEOPT_JSTACKFRAMES];
9920 10055
9921 10056 arg = DTRACE_USTACK_ARG(nframes, strsize);
9922 10057
9923 10058 /*FALLTHROUGH*/
9924 10059 case DTRACEACT_USTACK:
9925 10060 if (desc->dtad_kind != DTRACEACT_JSTACK &&
9926 10061 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
9927 10062 strsize = DTRACE_USTACK_STRSIZE(arg);
9928 10063 nframes = opt[DTRACEOPT_USTACKFRAMES];
9929 10064 ASSERT(nframes > 0);
9930 10065 arg = DTRACE_USTACK_ARG(nframes, strsize);
9931 10066 }
9932 10067
9933 10068 /*
9934 10069 * Save a slot for the pid.
9935 10070 */
9936 10071 size = (nframes + 1) * sizeof (uint64_t);
9937 10072 size += DTRACE_USTACK_STRSIZE(arg);
9938 10073 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
9939 10074
9940 10075 break;
9941 10076
9942 10077 case DTRACEACT_SYM:
9943 10078 case DTRACEACT_MOD:
9944 10079 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
9945 10080 sizeof (uint64_t)) ||
9946 10081 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9947 10082 return (EINVAL);
9948 10083 break;
9949 10084
9950 10085 case DTRACEACT_USYM:
9951 10086 case DTRACEACT_UMOD:
9952 10087 case DTRACEACT_UADDR:
9953 10088 if (dp == NULL ||
9954 10089 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
9955 10090 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9956 10091 return (EINVAL);
9957 10092
9958 10093 /*
9959 10094 * We have a slot for the pid, plus a slot for the
9960 10095 * argument. To keep things simple (aligned with
9961 10096 * bitness-neutral sizing), we store each as a 64-bit
9962 10097 * quantity.
9963 10098 */
9964 10099 size = 2 * sizeof (uint64_t);
9965 10100 break;
9966 10101
9967 10102 case DTRACEACT_STOP:
9968 10103 case DTRACEACT_BREAKPOINT:
9969 10104 case DTRACEACT_PANIC:
9970 10105 break;
9971 10106
9972 10107 case DTRACEACT_CHILL:
9973 10108 case DTRACEACT_DISCARD:
9974 10109 case DTRACEACT_RAISE:
9975 10110 if (dp == NULL)
9976 10111 return (EINVAL);
9977 10112 break;
9978 10113
9979 10114 case DTRACEACT_EXIT:
9980 10115 if (dp == NULL ||
9981 10116 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
9982 10117 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
9983 10118 return (EINVAL);
9984 10119 break;
9985 10120
9986 10121 case DTRACEACT_SPECULATE:
9987 10122 if (ecb->dte_size > sizeof (dtrace_rechdr_t))
9988 10123 return (EINVAL);
9989 10124
9990 10125 if (dp == NULL)
9991 10126 return (EINVAL);
9992 10127
9993 10128 state->dts_speculates = 1;
9994 10129 break;
9995 10130
9996 10131 case DTRACEACT_COMMIT: {
9997 10132 dtrace_action_t *act = ecb->dte_action;
9998 10133
9999 10134 for (; act != NULL; act = act->dta_next) {
10000 10135 if (act->dta_kind == DTRACEACT_COMMIT)
10001 10136 return (EINVAL);
10002 10137 }
10003 10138
10004 10139 if (dp == NULL)
10005 10140 return (EINVAL);
10006 10141 break;
10007 10142 }
10008 10143
10009 10144 default:
10010 10145 return (EINVAL);
10011 10146 }
10012 10147
10013 10148 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
10014 10149 /*
10015 10150 * If this is a data-storing action or a speculate,
10016 10151 * we must be sure that there isn't a commit on the
10017 10152 * action chain.
10018 10153 */
10019 10154 dtrace_action_t *act = ecb->dte_action;
10020 10155
10021 10156 for (; act != NULL; act = act->dta_next) {
10022 10157 if (act->dta_kind == DTRACEACT_COMMIT)
10023 10158 return (EINVAL);
10024 10159 }
10025 10160 }
10026 10161
10027 10162 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
10028 10163 action->dta_rec.dtrd_size = size;
10029 10164 }
10030 10165
10031 10166 action->dta_refcnt = 1;
10032 10167 rec = &action->dta_rec;
10033 10168 size = rec->dtrd_size;
10034 10169
10035 10170 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
10036 10171 if (!(size & mask)) {
10037 10172 align = mask + 1;
10038 10173 break;
10039 10174 }
10040 10175 }
10041 10176
10042 10177 action->dta_kind = desc->dtad_kind;
10043 10178
10044 10179 if ((action->dta_difo = dp) != NULL)
10045 10180 dtrace_difo_hold(dp);
10046 10181
10047 10182 rec->dtrd_action = action->dta_kind;
10048 10183 rec->dtrd_arg = arg;
10049 10184 rec->dtrd_uarg = desc->dtad_uarg;
10050 10185 rec->dtrd_alignment = (uint16_t)align;
10051 10186 rec->dtrd_format = format;
10052 10187
10053 10188 if ((last = ecb->dte_action_last) != NULL) {
10054 10189 ASSERT(ecb->dte_action != NULL);
10055 10190 action->dta_prev = last;
10056 10191 last->dta_next = action;
10057 10192 } else {
10058 10193 ASSERT(ecb->dte_action == NULL);
10059 10194 ecb->dte_action = action;
10060 10195 }
10061 10196
10062 10197 ecb->dte_action_last = action;
10063 10198
10064 10199 return (0);
10065 10200 }
10066 10201
10067 10202 static void
10068 10203 dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
10069 10204 {
10070 10205 dtrace_action_t *act = ecb->dte_action, *next;
10071 10206 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
10072 10207 dtrace_difo_t *dp;
10073 10208 uint16_t format;
10074 10209
10075 10210 if (act != NULL && act->dta_refcnt > 1) {
10076 10211 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
10077 10212 act->dta_refcnt--;
10078 10213 } else {
10079 10214 for (; act != NULL; act = next) {
10080 10215 next = act->dta_next;
10081 10216 ASSERT(next != NULL || act == ecb->dte_action_last);
10082 10217 ASSERT(act->dta_refcnt == 1);
10083 10218
10084 10219 if ((format = act->dta_rec.dtrd_format) != 0)
10085 10220 dtrace_format_remove(ecb->dte_state, format);
10086 10221
10087 10222 if ((dp = act->dta_difo) != NULL)
10088 10223 dtrace_difo_release(dp, vstate);
10089 10224
10090 10225 if (DTRACEACT_ISAGG(act->dta_kind)) {
10091 10226 dtrace_ecb_aggregation_destroy(ecb, act);
10092 10227 } else {
10093 10228 kmem_free(act, sizeof (dtrace_action_t));
10094 10229 }
10095 10230 }
10096 10231 }
10097 10232
10098 10233 ecb->dte_action = NULL;
10099 10234 ecb->dte_action_last = NULL;
10100 10235 ecb->dte_size = 0;
10101 10236 }
10102 10237
10103 10238 static void
10104 10239 dtrace_ecb_disable(dtrace_ecb_t *ecb)
10105 10240 {
10106 10241 /*
10107 10242 * We disable the ECB by removing it from its probe.
10108 10243 */
10109 10244 dtrace_ecb_t *pecb, *prev = NULL;
10110 10245 dtrace_probe_t *probe = ecb->dte_probe;
10111 10246
10112 10247 ASSERT(MUTEX_HELD(&dtrace_lock));
10113 10248
10114 10249 if (probe == NULL) {
10115 10250 /*
10116 10251 * This is the NULL probe; there is nothing to disable.
10117 10252 */
10118 10253 return;
10119 10254 }
10120 10255
10121 10256 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
10122 10257 if (pecb == ecb)
10123 10258 break;
10124 10259 prev = pecb;
10125 10260 }
10126 10261
10127 10262 ASSERT(pecb != NULL);
10128 10263
10129 10264 if (prev == NULL) {
10130 10265 probe->dtpr_ecb = ecb->dte_next;
10131 10266 } else {
10132 10267 prev->dte_next = ecb->dte_next;
10133 10268 }
10134 10269
10135 10270 if (ecb == probe->dtpr_ecb_last) {
10136 10271 ASSERT(ecb->dte_next == NULL);
10137 10272 probe->dtpr_ecb_last = prev;
10138 10273 }
10139 10274
10140 10275 /*
10141 10276 * The ECB has been disconnected from the probe; now sync to assure
10142 10277 * that all CPUs have seen the change before returning.
10143 10278 */
10144 10279 dtrace_sync();
10145 10280
10146 10281 if (probe->dtpr_ecb == NULL) {
10147 10282 /*
10148 10283 * That was the last ECB on the probe; clear the predicate
10149 10284 * cache ID for the probe, disable it and sync one more time
10150 10285 * to assure that we'll never hit it again.
10151 10286 */
10152 10287 dtrace_provider_t *prov = probe->dtpr_provider;
10153 10288
10154 10289 ASSERT(ecb->dte_next == NULL);
10155 10290 ASSERT(probe->dtpr_ecb_last == NULL);
10156 10291 probe->dtpr_predcache = DTRACE_CACHEIDNONE;
10157 10292 prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
10158 10293 probe->dtpr_id, probe->dtpr_arg);
10159 10294 dtrace_sync();
10160 10295 } else {
10161 10296 /*
10162 10297 * There is at least one ECB remaining on the probe. If there
10163 10298 * is _exactly_ one, set the probe's predicate cache ID to be
10164 10299 * the predicate cache ID of the remaining ECB.
10165 10300 */
10166 10301 ASSERT(probe->dtpr_ecb_last != NULL);
10167 10302 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
10168 10303
10169 10304 if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
10170 10305 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
10171 10306
10172 10307 ASSERT(probe->dtpr_ecb->dte_next == NULL);
10173 10308
10174 10309 if (p != NULL)
10175 10310 probe->dtpr_predcache = p->dtp_cacheid;
10176 10311 }
10177 10312
10178 10313 ecb->dte_next = NULL;
10179 10314 }
10180 10315 }
10181 10316
10182 10317 static void
10183 10318 dtrace_ecb_destroy(dtrace_ecb_t *ecb)
10184 10319 {
10185 10320 dtrace_state_t *state = ecb->dte_state;
10186 10321 dtrace_vstate_t *vstate = &state->dts_vstate;
10187 10322 dtrace_predicate_t *pred;
10188 10323 dtrace_epid_t epid = ecb->dte_epid;
10189 10324
10190 10325 ASSERT(MUTEX_HELD(&dtrace_lock));
10191 10326 ASSERT(ecb->dte_next == NULL);
10192 10327 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
10193 10328
10194 10329 if ((pred = ecb->dte_predicate) != NULL)
10195 10330 dtrace_predicate_release(pred, vstate);
10196 10331
10197 10332 dtrace_ecb_action_remove(ecb);
10198 10333
10199 10334 ASSERT(state->dts_ecbs[epid - 1] == ecb);
10200 10335 state->dts_ecbs[epid - 1] = NULL;
10201 10336
10202 10337 kmem_free(ecb, sizeof (dtrace_ecb_t));
10203 10338 }
10204 10339
10205 10340 static dtrace_ecb_t *
10206 10341 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
10207 10342 dtrace_enabling_t *enab)
10208 10343 {
10209 10344 dtrace_ecb_t *ecb;
10210 10345 dtrace_predicate_t *pred;
10211 10346 dtrace_actdesc_t *act;
10212 10347 dtrace_provider_t *prov;
10213 10348 dtrace_ecbdesc_t *desc = enab->dten_current;
10214 10349
10215 10350 ASSERT(MUTEX_HELD(&dtrace_lock));
10216 10351 ASSERT(state != NULL);
10217 10352
10218 10353 ecb = dtrace_ecb_add(state, probe);
10219 10354 ecb->dte_uarg = desc->dted_uarg;
10220 10355
10221 10356 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
10222 10357 dtrace_predicate_hold(pred);
10223 10358 ecb->dte_predicate = pred;
10224 10359 }
10225 10360
10226 10361 if (probe != NULL) {
10227 10362 /*
10228 10363 * If the provider shows more leg than the consumer is old
10229 10364 * enough to see, we need to enable the appropriate implicit
10230 10365 * predicate bits to prevent the ecb from activating at
10231 10366 * revealing times.
10232 10367 *
10233 10368 * Providers specifying DTRACE_PRIV_USER at register time
10234 10369 * are stating that they need the /proc-style privilege
10235 10370 * model to be enforced, and this is what DTRACE_COND_OWNER
10236 10371 * and DTRACE_COND_ZONEOWNER will then do at probe time.
10237 10372 */
10238 10373 prov = probe->dtpr_provider;
10239 10374 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
10240 10375 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10241 10376 ecb->dte_cond |= DTRACE_COND_OWNER;
10242 10377
10243 10378 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
10244 10379 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10245 10380 ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
10246 10381
10247 10382 /*
10248 10383 * If the provider shows us kernel innards and the user
10249 10384 * is lacking sufficient privilege, enable the
10250 10385 * DTRACE_COND_USERMODE implicit predicate.
10251 10386 */
10252 10387 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
10253 10388 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
10254 10389 ecb->dte_cond |= DTRACE_COND_USERMODE;
10255 10390 }
10256 10391
10257 10392 if (dtrace_ecb_create_cache != NULL) {
10258 10393 /*
10259 10394 * If we have a cached ecb, we'll use its action list instead
10260 10395 * of creating our own (saving both time and space).
10261 10396 */
10262 10397 dtrace_ecb_t *cached = dtrace_ecb_create_cache;
10263 10398 dtrace_action_t *act = cached->dte_action;
10264 10399
10265 10400 if (act != NULL) {
10266 10401 ASSERT(act->dta_refcnt > 0);
10267 10402 act->dta_refcnt++;
10268 10403 ecb->dte_action = act;
10269 10404 ecb->dte_action_last = cached->dte_action_last;
10270 10405 ecb->dte_needed = cached->dte_needed;
10271 10406 ecb->dte_size = cached->dte_size;
10272 10407 ecb->dte_alignment = cached->dte_alignment;
10273 10408 }
10274 10409
10275 10410 return (ecb);
10276 10411 }
10277 10412
10278 10413 for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
10279 10414 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
10280 10415 dtrace_ecb_destroy(ecb);
10281 10416 return (NULL);
10282 10417 }
10283 10418 }
10284 10419
10285 10420 dtrace_ecb_resize(ecb);
10286 10421
10287 10422 return (dtrace_ecb_create_cache = ecb);
10288 10423 }
10289 10424
10290 10425 static int
10291 10426 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
10292 10427 {
10293 10428 dtrace_ecb_t *ecb;
10294 10429 dtrace_enabling_t *enab = arg;
10295 10430 dtrace_state_t *state = enab->dten_vstate->dtvs_state;
10296 10431
10297 10432 ASSERT(state != NULL);
10298 10433
10299 10434 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
10300 10435 /*
10301 10436 * This probe was created in a generation for which this
10302 10437 * enabling has previously created ECBs; we don't want to
10303 10438 * enable it again, so just kick out.
10304 10439 */
10305 10440 return (DTRACE_MATCH_NEXT);
10306 10441 }
10307 10442
10308 10443 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
10309 10444 return (DTRACE_MATCH_DONE);
10310 10445
10311 10446 if (dtrace_ecb_enable(ecb) < 0)
10312 10447 return (DTRACE_MATCH_FAIL);
10313 10448
10314 10449 return (DTRACE_MATCH_NEXT);
10315 10450 }
10316 10451
10317 10452 static dtrace_ecb_t *
10318 10453 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
10319 10454 {
10320 10455 dtrace_ecb_t *ecb;
10321 10456
10322 10457 ASSERT(MUTEX_HELD(&dtrace_lock));
10323 10458
10324 10459 if (id == 0 || id > state->dts_necbs)
10325 10460 return (NULL);
10326 10461
10327 10462 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
10328 10463 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
10329 10464
10330 10465 return (state->dts_ecbs[id - 1]);
10331 10466 }
10332 10467
10333 10468 static dtrace_aggregation_t *
10334 10469 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
10335 10470 {
10336 10471 dtrace_aggregation_t *agg;
10337 10472
10338 10473 ASSERT(MUTEX_HELD(&dtrace_lock));
10339 10474
10340 10475 if (id == 0 || id > state->dts_naggregations)
10341 10476 return (NULL);
10342 10477
10343 10478 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
10344 10479 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
10345 10480 agg->dtag_id == id);
10346 10481
10347 10482 return (state->dts_aggregations[id - 1]);
10348 10483 }
10349 10484
10350 10485 /*
10351 10486 * DTrace Buffer Functions
10352 10487 *
10353 10488 * The following functions manipulate DTrace buffers. Most of these functions
10354 10489 * are called in the context of establishing or processing consumer state;
10355 10490 * exceptions are explicitly noted.
10356 10491 */
10357 10492
10358 10493 /*
10359 10494 * Note: called from cross call context. This function switches the two
10360 10495 * buffers on a given CPU. The atomicity of this operation is assured by
10361 10496 * disabling interrupts while the actual switch takes place; the disabling of
10362 10497 * interrupts serializes the execution with any execution of dtrace_probe() on
10363 10498 * the same CPU.
10364 10499 */
10365 10500 static void
10366 10501 dtrace_buffer_switch(dtrace_buffer_t *buf)
10367 10502 {
10368 10503 caddr_t tomax = buf->dtb_tomax;
10369 10504 caddr_t xamot = buf->dtb_xamot;
10370 10505 dtrace_icookie_t cookie;
10371 10506 hrtime_t now;
10372 10507
10373 10508 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10374 10509 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
10375 10510
10376 10511 cookie = dtrace_interrupt_disable();
10377 10512 now = dtrace_gethrtime();
10378 10513 buf->dtb_tomax = xamot;
10379 10514 buf->dtb_xamot = tomax;
10380 10515 buf->dtb_xamot_drops = buf->dtb_drops;
10381 10516 buf->dtb_xamot_offset = buf->dtb_offset;
10382 10517 buf->dtb_xamot_errors = buf->dtb_errors;
10383 10518 buf->dtb_xamot_flags = buf->dtb_flags;
10384 10519 buf->dtb_offset = 0;
10385 10520 buf->dtb_drops = 0;
10386 10521 buf->dtb_errors = 0;
10387 10522 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
10388 10523 buf->dtb_interval = now - buf->dtb_switched;
10389 10524 buf->dtb_switched = now;
10390 10525 dtrace_interrupt_enable(cookie);
10391 10526 }
10392 10527
10393 10528 /*
10394 10529 * Note: called from cross call context. This function activates a buffer
10395 10530 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
10396 10531 * is guaranteed by the disabling of interrupts.
10397 10532 */
10398 10533 static void
10399 10534 dtrace_buffer_activate(dtrace_state_t *state)
10400 10535 {
10401 10536 dtrace_buffer_t *buf;
10402 10537 dtrace_icookie_t cookie = dtrace_interrupt_disable();
10403 10538
10404 10539 buf = &state->dts_buffer[CPU->cpu_id];
10405 10540
10406 10541 if (buf->dtb_tomax != NULL) {
10407 10542 /*
10408 10543 * We might like to assert that the buffer is marked inactive,
10409 10544 * but this isn't necessarily true: the buffer for the CPU
10410 10545 * that processes the BEGIN probe has its buffer activated
10411 10546 * manually. In this case, we take the (harmless) action
10412 10547 * re-clearing the bit INACTIVE bit.
10413 10548 */
10414 10549 buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
10415 10550 }
10416 10551
10417 10552 dtrace_interrupt_enable(cookie);
10418 10553 }
10419 10554
10420 10555 static int
10421 10556 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
10422 10557 processorid_t cpu, int *factor)
10423 10558 {
10424 10559 cpu_t *cp;
10425 10560 dtrace_buffer_t *buf;
10426 10561 int allocated = 0, desired = 0;
10427 10562
10428 10563 ASSERT(MUTEX_HELD(&cpu_lock));
10429 10564 ASSERT(MUTEX_HELD(&dtrace_lock));
10430 10565
10431 10566 *factor = 1;
10432 10567
10433 10568 if (size > dtrace_nonroot_maxsize &&
10434 10569 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
10435 10570 return (EFBIG);
10436 10571
10437 10572 cp = cpu_list;
10438 10573
10439 10574 do {
10440 10575 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10441 10576 continue;
10442 10577
10443 10578 buf = &bufs[cp->cpu_id];
10444 10579
10445 10580 /*
10446 10581 * If there is already a buffer allocated for this CPU, it
10447 10582 * is only possible that this is a DR event. In this case,
10448 10583 * the buffer size must match our specified size.
10449 10584 */
10450 10585 if (buf->dtb_tomax != NULL) {
10451 10586 ASSERT(buf->dtb_size == size);
10452 10587 continue;
10453 10588 }
10454 10589
10455 10590 ASSERT(buf->dtb_xamot == NULL);
10456 10591
10457 10592 if ((buf->dtb_tomax = kmem_zalloc(size,
10458 10593 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
10459 10594 goto err;
10460 10595
10461 10596 buf->dtb_size = size;
10462 10597 buf->dtb_flags = flags;
10463 10598 buf->dtb_offset = 0;
10464 10599 buf->dtb_drops = 0;
10465 10600
10466 10601 if (flags & DTRACEBUF_NOSWITCH)
10467 10602 continue;
10468 10603
10469 10604 if ((buf->dtb_xamot = kmem_zalloc(size,
10470 10605 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
10471 10606 goto err;
10472 10607 } while ((cp = cp->cpu_next) != cpu_list);
10473 10608
10474 10609 return (0);
10475 10610
10476 10611 err:
10477 10612 cp = cpu_list;
10478 10613
10479 10614 do {
10480 10615 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10481 10616 continue;
10482 10617
10483 10618 buf = &bufs[cp->cpu_id];
10484 10619 desired += 2;
10485 10620
10486 10621 if (buf->dtb_xamot != NULL) {
10487 10622 ASSERT(buf->dtb_tomax != NULL);
10488 10623 ASSERT(buf->dtb_size == size);
10489 10624 kmem_free(buf->dtb_xamot, size);
10490 10625 allocated++;
10491 10626 }
10492 10627
10493 10628 if (buf->dtb_tomax != NULL) {
10494 10629 ASSERT(buf->dtb_size == size);
10495 10630 kmem_free(buf->dtb_tomax, size);
10496 10631 allocated++;
10497 10632 }
10498 10633
10499 10634 buf->dtb_tomax = NULL;
10500 10635 buf->dtb_xamot = NULL;
10501 10636 buf->dtb_size = 0;
10502 10637 } while ((cp = cp->cpu_next) != cpu_list);
10503 10638
10504 10639 *factor = desired / (allocated > 0 ? allocated : 1);
10505 10640
10506 10641 return (ENOMEM);
10507 10642 }
10508 10643
10509 10644 /*
10510 10645 * Note: called from probe context. This function just increments the drop
10511 10646 * count on a buffer. It has been made a function to allow for the
10512 10647 * possibility of understanding the source of mysterious drop counts. (A
10513 10648 * problem for which one may be particularly disappointed that DTrace cannot
10514 10649 * be used to understand DTrace.)
10515 10650 */
10516 10651 static void
10517 10652 dtrace_buffer_drop(dtrace_buffer_t *buf)
10518 10653 {
10519 10654 buf->dtb_drops++;
10520 10655 }
10521 10656
10522 10657 /*
10523 10658 * Note: called from probe context. This function is called to reserve space
10524 10659 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10525 10660 * mstate. Returns the new offset in the buffer, or a negative value if an
10526 10661 * error has occurred.
10527 10662 */
10528 10663 static intptr_t
10529 10664 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
10530 10665 dtrace_state_t *state, dtrace_mstate_t *mstate)
10531 10666 {
10532 10667 intptr_t offs = buf->dtb_offset, soffs;
10533 10668 intptr_t woffs;
10534 10669 caddr_t tomax;
10535 10670 size_t total;
10536 10671
10537 10672 if (buf->dtb_flags & DTRACEBUF_INACTIVE)
10538 10673 return (-1);
10539 10674
10540 10675 if ((tomax = buf->dtb_tomax) == NULL) {
10541 10676 dtrace_buffer_drop(buf);
10542 10677 return (-1);
10543 10678 }
10544 10679
10545 10680 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
10546 10681 while (offs & (align - 1)) {
10547 10682 /*
10548 10683 * Assert that our alignment is off by a number which
10549 10684 * is itself sizeof (uint32_t) aligned.
10550 10685 */
10551 10686 ASSERT(!((align - (offs & (align - 1))) &
10552 10687 (sizeof (uint32_t) - 1)));
10553 10688 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
10554 10689 offs += sizeof (uint32_t);
10555 10690 }
10556 10691
10557 10692 if ((soffs = offs + needed) > buf->dtb_size) {
10558 10693 dtrace_buffer_drop(buf);
10559 10694 return (-1);
10560 10695 }
10561 10696
10562 10697 if (mstate == NULL)
10563 10698 return (offs);
10564 10699
10565 10700 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
10566 10701 mstate->dtms_scratch_size = buf->dtb_size - soffs;
10567 10702 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
10568 10703
10569 10704 return (offs);
10570 10705 }
10571 10706
10572 10707 if (buf->dtb_flags & DTRACEBUF_FILL) {
10573 10708 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
10574 10709 (buf->dtb_flags & DTRACEBUF_FULL))
10575 10710 return (-1);
10576 10711 goto out;
10577 10712 }
10578 10713
10579 10714 total = needed + (offs & (align - 1));
10580 10715
10581 10716 /*
10582 10717 * For a ring buffer, life is quite a bit more complicated. Before
10583 10718 * we can store any padding, we need to adjust our wrapping offset.
10584 10719 * (If we've never before wrapped or we're not about to, no adjustment
10585 10720 * is required.)
10586 10721 */
10587 10722 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
10588 10723 offs + total > buf->dtb_size) {
10589 10724 woffs = buf->dtb_xamot_offset;
10590 10725
10591 10726 if (offs + total > buf->dtb_size) {
10592 10727 /*
10593 10728 * We can't fit in the end of the buffer. First, a
10594 10729 * sanity check that we can fit in the buffer at all.
10595 10730 */
10596 10731 if (total > buf->dtb_size) {
10597 10732 dtrace_buffer_drop(buf);
10598 10733 return (-1);
10599 10734 }
10600 10735
10601 10736 /*
10602 10737 * We're going to be storing at the top of the buffer,
10603 10738 * so now we need to deal with the wrapped offset. We
10604 10739 * only reset our wrapped offset to 0 if it is
10605 10740 * currently greater than the current offset. If it
10606 10741 * is less than the current offset, it is because a
10607 10742 * previous allocation induced a wrap -- but the
10608 10743 * allocation didn't subsequently take the space due
10609 10744 * to an error or false predicate evaluation. In this
10610 10745 * case, we'll just leave the wrapped offset alone: if
10611 10746 * the wrapped offset hasn't been advanced far enough
10612 10747 * for this allocation, it will be adjusted in the
10613 10748 * lower loop.
10614 10749 */
10615 10750 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
10616 10751 if (woffs >= offs)
10617 10752 woffs = 0;
10618 10753 } else {
10619 10754 woffs = 0;
10620 10755 }
10621 10756
10622 10757 /*
10623 10758 * Now we know that we're going to be storing to the
10624 10759 * top of the buffer and that there is room for us
10625 10760 * there. We need to clear the buffer from the current
10626 10761 * offset to the end (there may be old gunk there).
10627 10762 */
10628 10763 while (offs < buf->dtb_size)
10629 10764 tomax[offs++] = 0;
10630 10765
10631 10766 /*
10632 10767 * We need to set our offset to zero. And because we
10633 10768 * are wrapping, we need to set the bit indicating as
10634 10769 * much. We can also adjust our needed space back
10635 10770 * down to the space required by the ECB -- we know
10636 10771 * that the top of the buffer is aligned.
10637 10772 */
10638 10773 offs = 0;
10639 10774 total = needed;
10640 10775 buf->dtb_flags |= DTRACEBUF_WRAPPED;
10641 10776 } else {
10642 10777 /*
10643 10778 * There is room for us in the buffer, so we simply
10644 10779 * need to check the wrapped offset.
10645 10780 */
10646 10781 if (woffs < offs) {
10647 10782 /*
10648 10783 * The wrapped offset is less than the offset.
10649 10784 * This can happen if we allocated buffer space
10650 10785 * that induced a wrap, but then we didn't
10651 10786 * subsequently take the space due to an error
10652 10787 * or false predicate evaluation. This is
10653 10788 * okay; we know that _this_ allocation isn't
10654 10789 * going to induce a wrap. We still can't
10655 10790 * reset the wrapped offset to be zero,
10656 10791 * however: the space may have been trashed in
10657 10792 * the previous failed probe attempt. But at
10658 10793 * least the wrapped offset doesn't need to
10659 10794 * be adjusted at all...
10660 10795 */
10661 10796 goto out;
10662 10797 }
10663 10798 }
10664 10799
10665 10800 while (offs + total > woffs) {
10666 10801 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
10667 10802 size_t size;
10668 10803
10669 10804 if (epid == DTRACE_EPIDNONE) {
10670 10805 size = sizeof (uint32_t);
10671 10806 } else {
10672 10807 ASSERT3U(epid, <=, state->dts_necbs);
10673 10808 ASSERT(state->dts_ecbs[epid - 1] != NULL);
10674 10809
10675 10810 size = state->dts_ecbs[epid - 1]->dte_size;
10676 10811 }
10677 10812
10678 10813 ASSERT(woffs + size <= buf->dtb_size);
10679 10814 ASSERT(size != 0);
10680 10815
10681 10816 if (woffs + size == buf->dtb_size) {
10682 10817 /*
10683 10818 * We've reached the end of the buffer; we want
10684 10819 * to set the wrapped offset to 0 and break
10685 10820 * out. However, if the offs is 0, then we're
10686 10821 * in a strange edge-condition: the amount of
10687 10822 * space that we want to reserve plus the size
10688 10823 * of the record that we're overwriting is
10689 10824 * greater than the size of the buffer. This
10690 10825 * is problematic because if we reserve the
10691 10826 * space but subsequently don't consume it (due
10692 10827 * to a failed predicate or error) the wrapped
10693 10828 * offset will be 0 -- yet the EPID at offset 0
10694 10829 * will not be committed. This situation is
10695 10830 * relatively easy to deal with: if we're in
10696 10831 * this case, the buffer is indistinguishable
10697 10832 * from one that hasn't wrapped; we need only
10698 10833 * finish the job by clearing the wrapped bit,
10699 10834 * explicitly setting the offset to be 0, and
10700 10835 * zero'ing out the old data in the buffer.
10701 10836 */
10702 10837 if (offs == 0) {
10703 10838 buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
10704 10839 buf->dtb_offset = 0;
10705 10840 woffs = total;
10706 10841
10707 10842 while (woffs < buf->dtb_size)
10708 10843 tomax[woffs++] = 0;
10709 10844 }
10710 10845
10711 10846 woffs = 0;
10712 10847 break;
10713 10848 }
10714 10849
10715 10850 woffs += size;
10716 10851 }
10717 10852
10718 10853 /*
10719 10854 * We have a wrapped offset. It may be that the wrapped offset
10720 10855 * has become zero -- that's okay.
10721 10856 */
10722 10857 buf->dtb_xamot_offset = woffs;
10723 10858 }
10724 10859
10725 10860 out:
10726 10861 /*
10727 10862 * Now we can plow the buffer with any necessary padding.
10728 10863 */
10729 10864 while (offs & (align - 1)) {
10730 10865 /*
10731 10866 * Assert that our alignment is off by a number which
10732 10867 * is itself sizeof (uint32_t) aligned.
10733 10868 */
10734 10869 ASSERT(!((align - (offs & (align - 1))) &
10735 10870 (sizeof (uint32_t) - 1)));
10736 10871 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
10737 10872 offs += sizeof (uint32_t);
10738 10873 }
10739 10874
10740 10875 if (buf->dtb_flags & DTRACEBUF_FILL) {
10741 10876 if (offs + needed > buf->dtb_size - state->dts_reserve) {
10742 10877 buf->dtb_flags |= DTRACEBUF_FULL;
10743 10878 return (-1);
10744 10879 }
10745 10880 }
10746 10881
10747 10882 if (mstate == NULL)
10748 10883 return (offs);
10749 10884
10750 10885 /*
10751 10886 * For ring buffers and fill buffers, the scratch space is always
10752 10887 * the inactive buffer.
10753 10888 */
10754 10889 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
10755 10890 mstate->dtms_scratch_size = buf->dtb_size;
10756 10891 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
10757 10892
10758 10893 return (offs);
10759 10894 }
10760 10895
10761 10896 static void
10762 10897 dtrace_buffer_polish(dtrace_buffer_t *buf)
10763 10898 {
10764 10899 ASSERT(buf->dtb_flags & DTRACEBUF_RING);
10765 10900 ASSERT(MUTEX_HELD(&dtrace_lock));
10766 10901
10767 10902 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
10768 10903 return;
10769 10904
10770 10905 /*
10771 10906 * We need to polish the ring buffer. There are three cases:
10772 10907 *
10773 10908 * - The first (and presumably most common) is that there is no gap
10774 10909 * between the buffer offset and the wrapped offset. In this case,
10775 10910 * there is nothing in the buffer that isn't valid data; we can
10776 10911 * mark the buffer as polished and return.
10777 10912 *
10778 10913 * - The second (less common than the first but still more common
10779 10914 * than the third) is that there is a gap between the buffer offset
10780 10915 * and the wrapped offset, and the wrapped offset is larger than the
10781 10916 * buffer offset. This can happen because of an alignment issue, or
10782 10917 * can happen because of a call to dtrace_buffer_reserve() that
10783 10918 * didn't subsequently consume the buffer space. In this case,
10784 10919 * we need to zero the data from the buffer offset to the wrapped
10785 10920 * offset.
10786 10921 *
10787 10922 * - The third (and least common) is that there is a gap between the
10788 10923 * buffer offset and the wrapped offset, but the wrapped offset is
10789 10924 * _less_ than the buffer offset. This can only happen because a
10790 10925 * call to dtrace_buffer_reserve() induced a wrap, but the space
10791 10926 * was not subsequently consumed. In this case, we need to zero the
10792 10927 * space from the offset to the end of the buffer _and_ from the
10793 10928 * top of the buffer to the wrapped offset.
10794 10929 */
10795 10930 if (buf->dtb_offset < buf->dtb_xamot_offset) {
10796 10931 bzero(buf->dtb_tomax + buf->dtb_offset,
10797 10932 buf->dtb_xamot_offset - buf->dtb_offset);
10798 10933 }
10799 10934
10800 10935 if (buf->dtb_offset > buf->dtb_xamot_offset) {
10801 10936 bzero(buf->dtb_tomax + buf->dtb_offset,
10802 10937 buf->dtb_size - buf->dtb_offset);
10803 10938 bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
10804 10939 }
10805 10940 }
10806 10941
10807 10942 /*
10808 10943 * This routine determines if data generated at the specified time has likely
10809 10944 * been entirely consumed at user-level. This routine is called to determine
10810 10945 * if an ECB on a defunct probe (but for an active enabling) can be safely
10811 10946 * disabled and destroyed.
10812 10947 */
10813 10948 static int
10814 10949 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
10815 10950 {
10816 10951 int i;
10817 10952
10818 10953 for (i = 0; i < NCPU; i++) {
10819 10954 dtrace_buffer_t *buf = &bufs[i];
10820 10955
10821 10956 if (buf->dtb_size == 0)
10822 10957 continue;
10823 10958
10824 10959 if (buf->dtb_flags & DTRACEBUF_RING)
10825 10960 return (0);
10826 10961
10827 10962 if (!buf->dtb_switched && buf->dtb_offset != 0)
10828 10963 return (0);
10829 10964
10830 10965 if (buf->dtb_switched - buf->dtb_interval < when)
10831 10966 return (0);
10832 10967 }
10833 10968
10834 10969 return (1);
10835 10970 }
10836 10971
10837 10972 static void
10838 10973 dtrace_buffer_free(dtrace_buffer_t *bufs)
10839 10974 {
10840 10975 int i;
10841 10976
10842 10977 for (i = 0; i < NCPU; i++) {
10843 10978 dtrace_buffer_t *buf = &bufs[i];
10844 10979
10845 10980 if (buf->dtb_tomax == NULL) {
10846 10981 ASSERT(buf->dtb_xamot == NULL);
10847 10982 ASSERT(buf->dtb_size == 0);
10848 10983 continue;
10849 10984 }
10850 10985
10851 10986 if (buf->dtb_xamot != NULL) {
10852 10987 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10853 10988 kmem_free(buf->dtb_xamot, buf->dtb_size);
10854 10989 }
10855 10990
10856 10991 kmem_free(buf->dtb_tomax, buf->dtb_size);
10857 10992 buf->dtb_size = 0;
10858 10993 buf->dtb_tomax = NULL;
10859 10994 buf->dtb_xamot = NULL;
10860 10995 }
10861 10996 }
10862 10997
10863 10998 /*
10864 10999 * DTrace Enabling Functions
10865 11000 */
10866 11001 static dtrace_enabling_t *
10867 11002 dtrace_enabling_create(dtrace_vstate_t *vstate)
10868 11003 {
10869 11004 dtrace_enabling_t *enab;
10870 11005
10871 11006 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
10872 11007 enab->dten_vstate = vstate;
10873 11008
10874 11009 return (enab);
10875 11010 }
10876 11011
10877 11012 static void
10878 11013 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
10879 11014 {
10880 11015 dtrace_ecbdesc_t **ndesc;
10881 11016 size_t osize, nsize;
10882 11017
10883 11018 /*
10884 11019 * We can't add to enablings after we've enabled them, or after we've
10885 11020 * retained them.
10886 11021 */
10887 11022 ASSERT(enab->dten_probegen == 0);
10888 11023 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
10889 11024
10890 11025 if (enab->dten_ndesc < enab->dten_maxdesc) {
10891 11026 enab->dten_desc[enab->dten_ndesc++] = ecb;
10892 11027 return;
10893 11028 }
10894 11029
10895 11030 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
10896 11031
10897 11032 if (enab->dten_maxdesc == 0) {
10898 11033 enab->dten_maxdesc = 1;
10899 11034 } else {
10900 11035 enab->dten_maxdesc <<= 1;
10901 11036 }
10902 11037
10903 11038 ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
10904 11039
10905 11040 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
10906 11041 ndesc = kmem_zalloc(nsize, KM_SLEEP);
10907 11042 bcopy(enab->dten_desc, ndesc, osize);
10908 11043 kmem_free(enab->dten_desc, osize);
10909 11044
10910 11045 enab->dten_desc = ndesc;
10911 11046 enab->dten_desc[enab->dten_ndesc++] = ecb;
10912 11047 }
10913 11048
10914 11049 static void
10915 11050 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
10916 11051 dtrace_probedesc_t *pd)
10917 11052 {
10918 11053 dtrace_ecbdesc_t *new;
10919 11054 dtrace_predicate_t *pred;
10920 11055 dtrace_actdesc_t *act;
10921 11056
10922 11057 /*
10923 11058 * We're going to create a new ECB description that matches the
10924 11059 * specified ECB in every way, but has the specified probe description.
10925 11060 */
10926 11061 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
10927 11062
10928 11063 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
10929 11064 dtrace_predicate_hold(pred);
10930 11065
10931 11066 for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
10932 11067 dtrace_actdesc_hold(act);
10933 11068
10934 11069 new->dted_action = ecb->dted_action;
10935 11070 new->dted_pred = ecb->dted_pred;
10936 11071 new->dted_probe = *pd;
10937 11072 new->dted_uarg = ecb->dted_uarg;
10938 11073
10939 11074 dtrace_enabling_add(enab, new);
10940 11075 }
10941 11076
10942 11077 static void
10943 11078 dtrace_enabling_dump(dtrace_enabling_t *enab)
10944 11079 {
10945 11080 int i;
10946 11081
10947 11082 for (i = 0; i < enab->dten_ndesc; i++) {
10948 11083 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
10949 11084
10950 11085 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
10951 11086 desc->dtpd_provider, desc->dtpd_mod,
10952 11087 desc->dtpd_func, desc->dtpd_name);
10953 11088 }
10954 11089 }
10955 11090
10956 11091 static void
10957 11092 dtrace_enabling_destroy(dtrace_enabling_t *enab)
10958 11093 {
10959 11094 int i;
10960 11095 dtrace_ecbdesc_t *ep;
10961 11096 dtrace_vstate_t *vstate = enab->dten_vstate;
10962 11097
10963 11098 ASSERT(MUTEX_HELD(&dtrace_lock));
10964 11099
10965 11100 for (i = 0; i < enab->dten_ndesc; i++) {
10966 11101 dtrace_actdesc_t *act, *next;
10967 11102 dtrace_predicate_t *pred;
10968 11103
10969 11104 ep = enab->dten_desc[i];
10970 11105
10971 11106 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
10972 11107 dtrace_predicate_release(pred, vstate);
10973 11108
10974 11109 for (act = ep->dted_action; act != NULL; act = next) {
10975 11110 next = act->dtad_next;
10976 11111 dtrace_actdesc_release(act, vstate);
10977 11112 }
10978 11113
10979 11114 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
10980 11115 }
10981 11116
10982 11117 kmem_free(enab->dten_desc,
10983 11118 enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
10984 11119
10985 11120 /*
10986 11121 * If this was a retained enabling, decrement the dts_nretained count
10987 11122 * and take it off of the dtrace_retained list.
10988 11123 */
10989 11124 if (enab->dten_prev != NULL || enab->dten_next != NULL ||
10990 11125 dtrace_retained == enab) {
10991 11126 ASSERT(enab->dten_vstate->dtvs_state != NULL);
10992 11127 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
10993 11128 enab->dten_vstate->dtvs_state->dts_nretained--;
10994 11129 dtrace_retained_gen++;
10995 11130 }
10996 11131
10997 11132 if (enab->dten_prev == NULL) {
10998 11133 if (dtrace_retained == enab) {
10999 11134 dtrace_retained = enab->dten_next;
11000 11135
11001 11136 if (dtrace_retained != NULL)
11002 11137 dtrace_retained->dten_prev = NULL;
11003 11138 }
11004 11139 } else {
11005 11140 ASSERT(enab != dtrace_retained);
11006 11141 ASSERT(dtrace_retained != NULL);
11007 11142 enab->dten_prev->dten_next = enab->dten_next;
11008 11143 }
11009 11144
11010 11145 if (enab->dten_next != NULL) {
11011 11146 ASSERT(dtrace_retained != NULL);
11012 11147 enab->dten_next->dten_prev = enab->dten_prev;
11013 11148 }
11014 11149
11015 11150 kmem_free(enab, sizeof (dtrace_enabling_t));
11016 11151 }
11017 11152
11018 11153 static int
11019 11154 dtrace_enabling_retain(dtrace_enabling_t *enab)
11020 11155 {
11021 11156 dtrace_state_t *state;
11022 11157
11023 11158 ASSERT(MUTEX_HELD(&dtrace_lock));
11024 11159 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
11025 11160 ASSERT(enab->dten_vstate != NULL);
11026 11161
11027 11162 state = enab->dten_vstate->dtvs_state;
11028 11163 ASSERT(state != NULL);
11029 11164
11030 11165 /*
11031 11166 * We only allow each state to retain dtrace_retain_max enablings.
11032 11167 */
11033 11168 if (state->dts_nretained >= dtrace_retain_max)
11034 11169 return (ENOSPC);
11035 11170
11036 11171 state->dts_nretained++;
11037 11172 dtrace_retained_gen++;
11038 11173
11039 11174 if (dtrace_retained == NULL) {
11040 11175 dtrace_retained = enab;
11041 11176 return (0);
11042 11177 }
11043 11178
11044 11179 enab->dten_next = dtrace_retained;
11045 11180 dtrace_retained->dten_prev = enab;
11046 11181 dtrace_retained = enab;
11047 11182
11048 11183 return (0);
11049 11184 }
11050 11185
11051 11186 static int
11052 11187 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
11053 11188 dtrace_probedesc_t *create)
11054 11189 {
11055 11190 dtrace_enabling_t *new, *enab;
11056 11191 int found = 0, err = ENOENT;
11057 11192
11058 11193 ASSERT(MUTEX_HELD(&dtrace_lock));
11059 11194 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
11060 11195 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
11061 11196 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
11062 11197 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
11063 11198
11064 11199 new = dtrace_enabling_create(&state->dts_vstate);
11065 11200
11066 11201 /*
11067 11202 * Iterate over all retained enablings, looking for enablings that
11068 11203 * match the specified state.
11069 11204 */
11070 11205 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11071 11206 int i;
11072 11207
11073 11208 /*
11074 11209 * dtvs_state can only be NULL for helper enablings -- and
11075 11210 * helper enablings can't be retained.
11076 11211 */
11077 11212 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11078 11213
11079 11214 if (enab->dten_vstate->dtvs_state != state)
11080 11215 continue;
11081 11216
11082 11217 /*
11083 11218 * Now iterate over each probe description; we're looking for
11084 11219 * an exact match to the specified probe description.
11085 11220 */
11086 11221 for (i = 0; i < enab->dten_ndesc; i++) {
11087 11222 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11088 11223 dtrace_probedesc_t *pd = &ep->dted_probe;
11089 11224
11090 11225 if (strcmp(pd->dtpd_provider, match->dtpd_provider))
11091 11226 continue;
11092 11227
11093 11228 if (strcmp(pd->dtpd_mod, match->dtpd_mod))
11094 11229 continue;
11095 11230
11096 11231 if (strcmp(pd->dtpd_func, match->dtpd_func))
11097 11232 continue;
11098 11233
11099 11234 if (strcmp(pd->dtpd_name, match->dtpd_name))
11100 11235 continue;
11101 11236
11102 11237 /*
11103 11238 * We have a winning probe! Add it to our growing
11104 11239 * enabling.
11105 11240 */
11106 11241 found = 1;
11107 11242 dtrace_enabling_addlike(new, ep, create);
11108 11243 }
11109 11244 }
11110 11245
11111 11246 if (!found || (err = dtrace_enabling_retain(new)) != 0) {
11112 11247 dtrace_enabling_destroy(new);
11113 11248 return (err);
11114 11249 }
11115 11250
11116 11251 return (0);
11117 11252 }
11118 11253
11119 11254 static void
11120 11255 dtrace_enabling_retract(dtrace_state_t *state)
11121 11256 {
11122 11257 dtrace_enabling_t *enab, *next;
11123 11258
11124 11259 ASSERT(MUTEX_HELD(&dtrace_lock));
11125 11260
11126 11261 /*
11127 11262 * Iterate over all retained enablings, destroy the enablings retained
11128 11263 * for the specified state.
11129 11264 */
11130 11265 for (enab = dtrace_retained; enab != NULL; enab = next) {
11131 11266 next = enab->dten_next;
11132 11267
11133 11268 /*
11134 11269 * dtvs_state can only be NULL for helper enablings -- and
11135 11270 * helper enablings can't be retained.
11136 11271 */
11137 11272 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11138 11273
11139 11274 if (enab->dten_vstate->dtvs_state == state) {
11140 11275 ASSERT(state->dts_nretained > 0);
11141 11276 dtrace_enabling_destroy(enab);
11142 11277 }
11143 11278 }
11144 11279
11145 11280 ASSERT(state->dts_nretained == 0);
11146 11281 }
11147 11282
11148 11283 static int
11149 11284 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
11150 11285 {
11151 11286 int i = 0;
11152 11287 int total_matched = 0, matched = 0;
11153 11288
11154 11289 ASSERT(MUTEX_HELD(&cpu_lock));
11155 11290 ASSERT(MUTEX_HELD(&dtrace_lock));
11156 11291
11157 11292 for (i = 0; i < enab->dten_ndesc; i++) {
11158 11293 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11159 11294
11160 11295 enab->dten_current = ep;
11161 11296 enab->dten_error = 0;
11162 11297
11163 11298 /*
11164 11299 * If a provider failed to enable a probe then get out and
11165 11300 * let the consumer know we failed.
11166 11301 */
11167 11302 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
11168 11303 return (EBUSY);
11169 11304
11170 11305 total_matched += matched;
11171 11306
11172 11307 if (enab->dten_error != 0) {
11173 11308 /*
11174 11309 * If we get an error half-way through enabling the
11175 11310 * probes, we kick out -- perhaps with some number of
11176 11311 * them enabled. Leaving enabled probes enabled may
11177 11312 * be slightly confusing for user-level, but we expect
11178 11313 * that no one will attempt to actually drive on in
11179 11314 * the face of such errors. If this is an anonymous
11180 11315 * enabling (indicated with a NULL nmatched pointer),
11181 11316 * we cmn_err() a message. We aren't expecting to
11182 11317 * get such an error -- such as it can exist at all,
11183 11318 * it would be a result of corrupted DOF in the driver
11184 11319 * properties.
11185 11320 */
11186 11321 if (nmatched == NULL) {
11187 11322 cmn_err(CE_WARN, "dtrace_enabling_match() "
11188 11323 "error on %p: %d", (void *)ep,
11189 11324 enab->dten_error);
11190 11325 }
11191 11326
11192 11327 return (enab->dten_error);
11193 11328 }
11194 11329 }
11195 11330
11196 11331 enab->dten_probegen = dtrace_probegen;
11197 11332 if (nmatched != NULL)
11198 11333 *nmatched = total_matched;
11199 11334
11200 11335 return (0);
11201 11336 }
11202 11337
11203 11338 static void
11204 11339 dtrace_enabling_matchall(void)
11205 11340 {
11206 11341 dtrace_enabling_t *enab;
11207 11342
11208 11343 mutex_enter(&cpu_lock);
11209 11344 mutex_enter(&dtrace_lock);
11210 11345
11211 11346 /*
11212 11347 * Iterate over all retained enablings to see if any probes match
11213 11348 * against them. We only perform this operation on enablings for which
11214 11349 * we have sufficient permissions by virtue of being in the global zone
11215 11350 * or in the same zone as the DTrace client. Because we can be called
11216 11351 * after dtrace_detach() has been called, we cannot assert that there
11217 11352 * are retained enablings. We can safely load from dtrace_retained,
11218 11353 * however: the taskq_destroy() at the end of dtrace_detach() will
11219 11354 * block pending our completion.
11220 11355 */
11221 11356 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11222 11357 dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
11223 11358 cred_t *cr = dcr->dcr_cred;
11224 11359 zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
11225 11360
11226 11361 if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
11227 11362 (zone == GLOBAL_ZONEID || getzoneid() == zone)))
11228 11363 (void) dtrace_enabling_match(enab, NULL);
11229 11364 }
11230 11365
11231 11366 mutex_exit(&dtrace_lock);
11232 11367 mutex_exit(&cpu_lock);
11233 11368 }
11234 11369
11235 11370 /*
11236 11371 * If an enabling is to be enabled without having matched probes (that is, if
11237 11372 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
11238 11373 * enabling must be _primed_ by creating an ECB for every ECB description.
11239 11374 * This must be done to assure that we know the number of speculations, the
11240 11375 * number of aggregations, the minimum buffer size needed, etc. before we
11241 11376 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
11242 11377 * enabling any probes, we create ECBs for every ECB decription, but with a
11243 11378 * NULL probe -- which is exactly what this function does.
11244 11379 */
11245 11380 static void
11246 11381 dtrace_enabling_prime(dtrace_state_t *state)
11247 11382 {
11248 11383 dtrace_enabling_t *enab;
11249 11384 int i;
11250 11385
11251 11386 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11252 11387 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11253 11388
11254 11389 if (enab->dten_vstate->dtvs_state != state)
11255 11390 continue;
11256 11391
11257 11392 /*
11258 11393 * We don't want to prime an enabling more than once, lest
11259 11394 * we allow a malicious user to induce resource exhaustion.
11260 11395 * (The ECBs that result from priming an enabling aren't
11261 11396 * leaked -- but they also aren't deallocated until the
11262 11397 * consumer state is destroyed.)
11263 11398 */
11264 11399 if (enab->dten_primed)
11265 11400 continue;
11266 11401
11267 11402 for (i = 0; i < enab->dten_ndesc; i++) {
11268 11403 enab->dten_current = enab->dten_desc[i];
11269 11404 (void) dtrace_probe_enable(NULL, enab);
11270 11405 }
11271 11406
11272 11407 enab->dten_primed = 1;
11273 11408 }
11274 11409 }
11275 11410
11276 11411 /*
11277 11412 * Called to indicate that probes should be provided due to retained
11278 11413 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
11279 11414 * must take an initial lap through the enabling calling the dtps_provide()
11280 11415 * entry point explicitly to allow for autocreated probes.
11281 11416 */
11282 11417 static void
11283 11418 dtrace_enabling_provide(dtrace_provider_t *prv)
11284 11419 {
11285 11420 int i, all = 0;
11286 11421 dtrace_probedesc_t desc;
11287 11422 dtrace_genid_t gen;
11288 11423
11289 11424 ASSERT(MUTEX_HELD(&dtrace_lock));
11290 11425 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
11291 11426
11292 11427 if (prv == NULL) {
11293 11428 all = 1;
11294 11429 prv = dtrace_provider;
11295 11430 }
11296 11431
11297 11432 do {
11298 11433 dtrace_enabling_t *enab;
11299 11434 void *parg = prv->dtpv_arg;
11300 11435
11301 11436 retry:
11302 11437 gen = dtrace_retained_gen;
11303 11438 for (enab = dtrace_retained; enab != NULL;
11304 11439 enab = enab->dten_next) {
11305 11440 for (i = 0; i < enab->dten_ndesc; i++) {
11306 11441 desc = enab->dten_desc[i]->dted_probe;
11307 11442 mutex_exit(&dtrace_lock);
11308 11443 prv->dtpv_pops.dtps_provide(parg, &desc);
11309 11444 mutex_enter(&dtrace_lock);
11310 11445 /*
11311 11446 * Process the retained enablings again if
11312 11447 * they have changed while we weren't holding
11313 11448 * dtrace_lock.
11314 11449 */
11315 11450 if (gen != dtrace_retained_gen)
11316 11451 goto retry;
11317 11452 }
11318 11453 }
11319 11454 } while (all && (prv = prv->dtpv_next) != NULL);
11320 11455
11321 11456 mutex_exit(&dtrace_lock);
11322 11457 dtrace_probe_provide(NULL, all ? NULL : prv);
11323 11458 mutex_enter(&dtrace_lock);
11324 11459 }
11325 11460
11326 11461 /*
11327 11462 * Called to reap ECBs that are attached to probes from defunct providers.
11328 11463 */
11329 11464 static void
11330 11465 dtrace_enabling_reap(void)
11331 11466 {
11332 11467 dtrace_provider_t *prov;
11333 11468 dtrace_probe_t *probe;
11334 11469 dtrace_ecb_t *ecb;
11335 11470 hrtime_t when;
11336 11471 int i;
11337 11472
11338 11473 mutex_enter(&cpu_lock);
11339 11474 mutex_enter(&dtrace_lock);
11340 11475
11341 11476 for (i = 0; i < dtrace_nprobes; i++) {
11342 11477 if ((probe = dtrace_probes[i]) == NULL)
11343 11478 continue;
11344 11479
11345 11480 if (probe->dtpr_ecb == NULL)
11346 11481 continue;
11347 11482
11348 11483 prov = probe->dtpr_provider;
11349 11484
11350 11485 if ((when = prov->dtpv_defunct) == 0)
11351 11486 continue;
11352 11487
11353 11488 /*
11354 11489 * We have ECBs on a defunct provider: we want to reap these
11355 11490 * ECBs to allow the provider to unregister. The destruction
11356 11491 * of these ECBs must be done carefully: if we destroy the ECB
11357 11492 * and the consumer later wishes to consume an EPID that
11358 11493 * corresponds to the destroyed ECB (and if the EPID metadata
11359 11494 * has not been previously consumed), the consumer will abort
11360 11495 * processing on the unknown EPID. To reduce (but not, sadly,
11361 11496 * eliminate) the possibility of this, we will only destroy an
11362 11497 * ECB for a defunct provider if, for the state that
11363 11498 * corresponds to the ECB:
11364 11499 *
11365 11500 * (a) There is no speculative tracing (which can effectively
11366 11501 * cache an EPID for an arbitrary amount of time).
11367 11502 *
11368 11503 * (b) The principal buffers have been switched twice since the
11369 11504 * provider became defunct.
11370 11505 *
11371 11506 * (c) The aggregation buffers are of zero size or have been
11372 11507 * switched twice since the provider became defunct.
11373 11508 *
11374 11509 * We use dts_speculates to determine (a) and call a function
11375 11510 * (dtrace_buffer_consumed()) to determine (b) and (c). Note
11376 11511 * that as soon as we've been unable to destroy one of the ECBs
11377 11512 * associated with the probe, we quit trying -- reaping is only
11378 11513 * fruitful in as much as we can destroy all ECBs associated
11379 11514 * with the defunct provider's probes.
11380 11515 */
11381 11516 while ((ecb = probe->dtpr_ecb) != NULL) {
11382 11517 dtrace_state_t *state = ecb->dte_state;
11383 11518 dtrace_buffer_t *buf = state->dts_buffer;
11384 11519 dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
11385 11520
11386 11521 if (state->dts_speculates)
11387 11522 break;
11388 11523
11389 11524 if (!dtrace_buffer_consumed(buf, when))
11390 11525 break;
11391 11526
11392 11527 if (!dtrace_buffer_consumed(aggbuf, when))
11393 11528 break;
11394 11529
11395 11530 dtrace_ecb_disable(ecb);
11396 11531 ASSERT(probe->dtpr_ecb != ecb);
11397 11532 dtrace_ecb_destroy(ecb);
11398 11533 }
11399 11534 }
11400 11535
11401 11536 mutex_exit(&dtrace_lock);
11402 11537 mutex_exit(&cpu_lock);
11403 11538 }
11404 11539
11405 11540 /*
11406 11541 * DTrace DOF Functions
11407 11542 */
11408 11543 /*ARGSUSED*/
11409 11544 static void
11410 11545 dtrace_dof_error(dof_hdr_t *dof, const char *str)
11411 11546 {
11412 11547 if (dtrace_err_verbose)
11413 11548 cmn_err(CE_WARN, "failed to process DOF: %s", str);
11414 11549
11415 11550 #ifdef DTRACE_ERRDEBUG
11416 11551 dtrace_errdebug(str);
11417 11552 #endif
11418 11553 }
11419 11554
11420 11555 /*
11421 11556 * Create DOF out of a currently enabled state. Right now, we only create
11422 11557 * DOF containing the run-time options -- but this could be expanded to create
11423 11558 * complete DOF representing the enabled state.
11424 11559 */
11425 11560 static dof_hdr_t *
11426 11561 dtrace_dof_create(dtrace_state_t *state)
11427 11562 {
11428 11563 dof_hdr_t *dof;
11429 11564 dof_sec_t *sec;
11430 11565 dof_optdesc_t *opt;
11431 11566 int i, len = sizeof (dof_hdr_t) +
11432 11567 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
11433 11568 sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11434 11569
11435 11570 ASSERT(MUTEX_HELD(&dtrace_lock));
11436 11571
11437 11572 dof = kmem_zalloc(len, KM_SLEEP);
11438 11573 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
11439 11574 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
11440 11575 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
11441 11576 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
11442 11577
11443 11578 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
11444 11579 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
11445 11580 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
11446 11581 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
11447 11582 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
11448 11583 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
11449 11584
11450 11585 dof->dofh_flags = 0;
11451 11586 dof->dofh_hdrsize = sizeof (dof_hdr_t);
11452 11587 dof->dofh_secsize = sizeof (dof_sec_t);
11453 11588 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
11454 11589 dof->dofh_secoff = sizeof (dof_hdr_t);
11455 11590 dof->dofh_loadsz = len;
11456 11591 dof->dofh_filesz = len;
11457 11592 dof->dofh_pad = 0;
11458 11593
11459 11594 /*
11460 11595 * Fill in the option section header...
11461 11596 */
11462 11597 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
11463 11598 sec->dofs_type = DOF_SECT_OPTDESC;
11464 11599 sec->dofs_align = sizeof (uint64_t);
11465 11600 sec->dofs_flags = DOF_SECF_LOAD;
11466 11601 sec->dofs_entsize = sizeof (dof_optdesc_t);
11467 11602
11468 11603 opt = (dof_optdesc_t *)((uintptr_t)sec +
11469 11604 roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
11470 11605
11471 11606 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
11472 11607 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11473 11608
11474 11609 for (i = 0; i < DTRACEOPT_MAX; i++) {
11475 11610 opt[i].dofo_option = i;
11476 11611 opt[i].dofo_strtab = DOF_SECIDX_NONE;
11477 11612 opt[i].dofo_value = state->dts_options[i];
11478 11613 }
11479 11614
11480 11615 return (dof);
11481 11616 }
11482 11617
11483 11618 static dof_hdr_t *
11484 11619 dtrace_dof_copyin(uintptr_t uarg, int *errp)
11485 11620 {
11486 11621 dof_hdr_t hdr, *dof;
11487 11622
11488 11623 ASSERT(!MUTEX_HELD(&dtrace_lock));
11489 11624
11490 11625 /*
11491 11626 * First, we're going to copyin() the sizeof (dof_hdr_t).
11492 11627 */
11493 11628 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
11494 11629 dtrace_dof_error(NULL, "failed to copyin DOF header");
11495 11630 *errp = EFAULT;
11496 11631 return (NULL);
11497 11632 }
11498 11633
11499 11634 /*
11500 11635 * Now we'll allocate the entire DOF and copy it in -- provided
11501 11636 * that the length isn't outrageous.
11502 11637 */
11503 11638 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
11504 11639 dtrace_dof_error(&hdr, "load size exceeds maximum");
11505 11640 *errp = E2BIG;
11506 11641 return (NULL);
11507 11642 }
11508 11643
11509 11644 if (hdr.dofh_loadsz < sizeof (hdr)) {
11510 11645 dtrace_dof_error(&hdr, "invalid load size");
11511 11646 *errp = EINVAL;
11512 11647 return (NULL);
11513 11648 }
11514 11649
11515 11650 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
11516 11651
11517 11652 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
11518 11653 dof->dofh_loadsz != hdr.dofh_loadsz) {
11519 11654 kmem_free(dof, hdr.dofh_loadsz);
11520 11655 *errp = EFAULT;
11521 11656 return (NULL);
11522 11657 }
11523 11658
11524 11659 return (dof);
11525 11660 }
11526 11661
11527 11662 static dof_hdr_t *
11528 11663 dtrace_dof_property(const char *name)
11529 11664 {
11530 11665 uchar_t *buf;
11531 11666 uint64_t loadsz;
11532 11667 unsigned int len, i;
11533 11668 dof_hdr_t *dof;
11534 11669
11535 11670 /*
11536 11671 * Unfortunately, array of values in .conf files are always (and
11537 11672 * only) interpreted to be integer arrays. We must read our DOF
11538 11673 * as an integer array, and then squeeze it into a byte array.
11539 11674 */
11540 11675 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
11541 11676 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
11542 11677 return (NULL);
11543 11678
11544 11679 for (i = 0; i < len; i++)
11545 11680 buf[i] = (uchar_t)(((int *)buf)[i]);
11546 11681
11547 11682 if (len < sizeof (dof_hdr_t)) {
11548 11683 ddi_prop_free(buf);
11549 11684 dtrace_dof_error(NULL, "truncated header");
11550 11685 return (NULL);
11551 11686 }
11552 11687
11553 11688 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
11554 11689 ddi_prop_free(buf);
11555 11690 dtrace_dof_error(NULL, "truncated DOF");
11556 11691 return (NULL);
11557 11692 }
11558 11693
11559 11694 if (loadsz >= dtrace_dof_maxsize) {
11560 11695 ddi_prop_free(buf);
11561 11696 dtrace_dof_error(NULL, "oversized DOF");
11562 11697 return (NULL);
11563 11698 }
11564 11699
11565 11700 dof = kmem_alloc(loadsz, KM_SLEEP);
11566 11701 bcopy(buf, dof, loadsz);
11567 11702 ddi_prop_free(buf);
11568 11703
11569 11704 return (dof);
11570 11705 }
11571 11706
11572 11707 static void
11573 11708 dtrace_dof_destroy(dof_hdr_t *dof)
11574 11709 {
11575 11710 kmem_free(dof, dof->dofh_loadsz);
11576 11711 }
11577 11712
11578 11713 /*
11579 11714 * Return the dof_sec_t pointer corresponding to a given section index. If the
11580 11715 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11581 11716 * a type other than DOF_SECT_NONE is specified, the header is checked against
11582 11717 * this type and NULL is returned if the types do not match.
11583 11718 */
11584 11719 static dof_sec_t *
11585 11720 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
11586 11721 {
11587 11722 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
11588 11723 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
11589 11724
11590 11725 if (i >= dof->dofh_secnum) {
11591 11726 dtrace_dof_error(dof, "referenced section index is invalid");
11592 11727 return (NULL);
11593 11728 }
11594 11729
11595 11730 if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
11596 11731 dtrace_dof_error(dof, "referenced section is not loadable");
11597 11732 return (NULL);
11598 11733 }
11599 11734
11600 11735 if (type != DOF_SECT_NONE && type != sec->dofs_type) {
11601 11736 dtrace_dof_error(dof, "referenced section is the wrong type");
11602 11737 return (NULL);
11603 11738 }
11604 11739
11605 11740 return (sec);
11606 11741 }
11607 11742
11608 11743 static dtrace_probedesc_t *
11609 11744 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
11610 11745 {
11611 11746 dof_probedesc_t *probe;
11612 11747 dof_sec_t *strtab;
11613 11748 uintptr_t daddr = (uintptr_t)dof;
11614 11749 uintptr_t str;
11615 11750 size_t size;
11616 11751
11617 11752 if (sec->dofs_type != DOF_SECT_PROBEDESC) {
11618 11753 dtrace_dof_error(dof, "invalid probe section");
11619 11754 return (NULL);
11620 11755 }
11621 11756
11622 11757 if (sec->dofs_align != sizeof (dof_secidx_t)) {
11623 11758 dtrace_dof_error(dof, "bad alignment in probe description");
11624 11759 return (NULL);
11625 11760 }
11626 11761
11627 11762 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
11628 11763 dtrace_dof_error(dof, "truncated probe description");
11629 11764 return (NULL);
11630 11765 }
11631 11766
11632 11767 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
11633 11768 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
11634 11769
11635 11770 if (strtab == NULL)
11636 11771 return (NULL);
11637 11772
11638 11773 str = daddr + strtab->dofs_offset;
11639 11774 size = strtab->dofs_size;
11640 11775
11641 11776 if (probe->dofp_provider >= strtab->dofs_size) {
11642 11777 dtrace_dof_error(dof, "corrupt probe provider");
11643 11778 return (NULL);
11644 11779 }
11645 11780
11646 11781 (void) strncpy(desc->dtpd_provider,
11647 11782 (char *)(str + probe->dofp_provider),
11648 11783 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
11649 11784
11650 11785 if (probe->dofp_mod >= strtab->dofs_size) {
11651 11786 dtrace_dof_error(dof, "corrupt probe module");
11652 11787 return (NULL);
11653 11788 }
11654 11789
11655 11790 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
11656 11791 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
11657 11792
11658 11793 if (probe->dofp_func >= strtab->dofs_size) {
11659 11794 dtrace_dof_error(dof, "corrupt probe function");
11660 11795 return (NULL);
11661 11796 }
11662 11797
11663 11798 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
11664 11799 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
11665 11800
11666 11801 if (probe->dofp_name >= strtab->dofs_size) {
11667 11802 dtrace_dof_error(dof, "corrupt probe name");
11668 11803 return (NULL);
11669 11804 }
11670 11805
11671 11806 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
11672 11807 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
11673 11808
11674 11809 return (desc);
11675 11810 }
11676 11811
11677 11812 static dtrace_difo_t *
11678 11813 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11679 11814 cred_t *cr)
11680 11815 {
11681 11816 dtrace_difo_t *dp;
11682 11817 size_t ttl = 0;
11683 11818 dof_difohdr_t *dofd;
11684 11819 uintptr_t daddr = (uintptr_t)dof;
11685 11820 size_t max = dtrace_difo_maxsize;
11686 11821 int i, l, n;
11687 11822
11688 11823 static const struct {
11689 11824 int section;
11690 11825 int bufoffs;
11691 11826 int lenoffs;
11692 11827 int entsize;
11693 11828 int align;
11694 11829 const char *msg;
11695 11830 } difo[] = {
11696 11831 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
11697 11832 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
11698 11833 sizeof (dif_instr_t), "multiple DIF sections" },
11699 11834
11700 11835 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
11701 11836 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
11702 11837 sizeof (uint64_t), "multiple integer tables" },
11703 11838
11704 11839 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
11705 11840 offsetof(dtrace_difo_t, dtdo_strlen), 0,
11706 11841 sizeof (char), "multiple string tables" },
11707 11842
11708 11843 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
11709 11844 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
11710 11845 sizeof (uint_t), "multiple variable tables" },
11711 11846
11712 11847 { DOF_SECT_NONE, 0, 0, 0, NULL }
11713 11848 };
11714 11849
11715 11850 if (sec->dofs_type != DOF_SECT_DIFOHDR) {
11716 11851 dtrace_dof_error(dof, "invalid DIFO header section");
11717 11852 return (NULL);
11718 11853 }
11719 11854
11720 11855 if (sec->dofs_align != sizeof (dof_secidx_t)) {
11721 11856 dtrace_dof_error(dof, "bad alignment in DIFO header");
11722 11857 return (NULL);
11723 11858 }
11724 11859
11725 11860 if (sec->dofs_size < sizeof (dof_difohdr_t) ||
11726 11861 sec->dofs_size % sizeof (dof_secidx_t)) {
11727 11862 dtrace_dof_error(dof, "bad size in DIFO header");
11728 11863 return (NULL);
11729 11864 }
11730 11865
11731 11866 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
11732 11867 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
11733 11868
11734 11869 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
11735 11870 dp->dtdo_rtype = dofd->dofd_rtype;
11736 11871
11737 11872 for (l = 0; l < n; l++) {
11738 11873 dof_sec_t *subsec;
11739 11874 void **bufp;
11740 11875 uint32_t *lenp;
11741 11876
11742 11877 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
11743 11878 dofd->dofd_links[l])) == NULL)
11744 11879 goto err; /* invalid section link */
11745 11880
11746 11881 if (ttl + subsec->dofs_size > max) {
11747 11882 dtrace_dof_error(dof, "exceeds maximum size");
11748 11883 goto err;
11749 11884 }
11750 11885
11751 11886 ttl += subsec->dofs_size;
11752 11887
11753 11888 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
11754 11889 if (subsec->dofs_type != difo[i].section)
11755 11890 continue;
11756 11891
11757 11892 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
11758 11893 dtrace_dof_error(dof, "section not loaded");
11759 11894 goto err;
11760 11895 }
11761 11896
11762 11897 if (subsec->dofs_align != difo[i].align) {
11763 11898 dtrace_dof_error(dof, "bad alignment");
11764 11899 goto err;
11765 11900 }
11766 11901
11767 11902 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
11768 11903 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
11769 11904
11770 11905 if (*bufp != NULL) {
11771 11906 dtrace_dof_error(dof, difo[i].msg);
11772 11907 goto err;
11773 11908 }
11774 11909
11775 11910 if (difo[i].entsize != subsec->dofs_entsize) {
11776 11911 dtrace_dof_error(dof, "entry size mismatch");
11777 11912 goto err;
11778 11913 }
11779 11914
11780 11915 if (subsec->dofs_entsize != 0 &&
11781 11916 (subsec->dofs_size % subsec->dofs_entsize) != 0) {
11782 11917 dtrace_dof_error(dof, "corrupt entry size");
11783 11918 goto err;
11784 11919 }
11785 11920
11786 11921 *lenp = subsec->dofs_size;
11787 11922 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
11788 11923 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
11789 11924 *bufp, subsec->dofs_size);
11790 11925
11791 11926 if (subsec->dofs_entsize != 0)
11792 11927 *lenp /= subsec->dofs_entsize;
11793 11928
11794 11929 break;
11795 11930 }
11796 11931
11797 11932 /*
11798 11933 * If we encounter a loadable DIFO sub-section that is not
11799 11934 * known to us, assume this is a broken program and fail.
11800 11935 */
11801 11936 if (difo[i].section == DOF_SECT_NONE &&
11802 11937 (subsec->dofs_flags & DOF_SECF_LOAD)) {
11803 11938 dtrace_dof_error(dof, "unrecognized DIFO subsection");
11804 11939 goto err;
11805 11940 }
11806 11941 }
11807 11942
11808 11943 if (dp->dtdo_buf == NULL) {
11809 11944 /*
11810 11945 * We can't have a DIF object without DIF text.
11811 11946 */
11812 11947 dtrace_dof_error(dof, "missing DIF text");
11813 11948 goto err;
11814 11949 }
11815 11950
11816 11951 /*
11817 11952 * Before we validate the DIF object, run through the variable table
11818 11953 * looking for the strings -- if any of their size are under, we'll set
11819 11954 * their size to be the system-wide default string size. Note that
11820 11955 * this should _not_ happen if the "strsize" option has been set --
11821 11956 * in this case, the compiler should have set the size to reflect the
11822 11957 * setting of the option.
11823 11958 */
11824 11959 for (i = 0; i < dp->dtdo_varlen; i++) {
11825 11960 dtrace_difv_t *v = &dp->dtdo_vartab[i];
11826 11961 dtrace_diftype_t *t = &v->dtdv_type;
11827 11962
11828 11963 if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
11829 11964 continue;
11830 11965
11831 11966 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
11832 11967 t->dtdt_size = dtrace_strsize_default;
11833 11968 }
11834 11969
11835 11970 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
11836 11971 goto err;
11837 11972
11838 11973 dtrace_difo_init(dp, vstate);
11839 11974 return (dp);
11840 11975
11841 11976 err:
11842 11977 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
11843 11978 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
11844 11979 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
11845 11980 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
11846 11981
11847 11982 kmem_free(dp, sizeof (dtrace_difo_t));
11848 11983 return (NULL);
11849 11984 }
11850 11985
11851 11986 static dtrace_predicate_t *
11852 11987 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11853 11988 cred_t *cr)
11854 11989 {
11855 11990 dtrace_difo_t *dp;
11856 11991
11857 11992 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
11858 11993 return (NULL);
11859 11994
11860 11995 return (dtrace_predicate_create(dp));
11861 11996 }
11862 11997
11863 11998 static dtrace_actdesc_t *
11864 11999 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11865 12000 cred_t *cr)
11866 12001 {
11867 12002 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
11868 12003 dof_actdesc_t *desc;
11869 12004 dof_sec_t *difosec;
11870 12005 size_t offs;
11871 12006 uintptr_t daddr = (uintptr_t)dof;
11872 12007 uint64_t arg;
11873 12008 dtrace_actkind_t kind;
11874 12009
11875 12010 if (sec->dofs_type != DOF_SECT_ACTDESC) {
11876 12011 dtrace_dof_error(dof, "invalid action section");
11877 12012 return (NULL);
11878 12013 }
11879 12014
11880 12015 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
11881 12016 dtrace_dof_error(dof, "truncated action description");
11882 12017 return (NULL);
11883 12018 }
11884 12019
11885 12020 if (sec->dofs_align != sizeof (uint64_t)) {
11886 12021 dtrace_dof_error(dof, "bad alignment in action description");
11887 12022 return (NULL);
11888 12023 }
11889 12024
11890 12025 if (sec->dofs_size < sec->dofs_entsize) {
11891 12026 dtrace_dof_error(dof, "section entry size exceeds total size");
11892 12027 return (NULL);
11893 12028 }
11894 12029
11895 12030 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
11896 12031 dtrace_dof_error(dof, "bad entry size in action description");
11897 12032 return (NULL);
11898 12033 }
11899 12034
11900 12035 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
11901 12036 dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
11902 12037 return (NULL);
11903 12038 }
11904 12039
11905 12040 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
11906 12041 desc = (dof_actdesc_t *)(daddr +
11907 12042 (uintptr_t)sec->dofs_offset + offs);
11908 12043 kind = (dtrace_actkind_t)desc->dofa_kind;
11909 12044
11910 12045 if ((DTRACEACT_ISPRINTFLIKE(kind) &&
11911 12046 (kind != DTRACEACT_PRINTA ||
11912 12047 desc->dofa_strtab != DOF_SECIDX_NONE)) ||
11913 12048 (kind == DTRACEACT_DIFEXPR &&
11914 12049 desc->dofa_strtab != DOF_SECIDX_NONE)) {
11915 12050 dof_sec_t *strtab;
11916 12051 char *str, *fmt;
11917 12052 uint64_t i;
11918 12053
11919 12054 /*
11920 12055 * The argument to these actions is an index into the
11921 12056 * DOF string table. For printf()-like actions, this
11922 12057 * is the format string. For print(), this is the
11923 12058 * CTF type of the expression result.
11924 12059 */
11925 12060 if ((strtab = dtrace_dof_sect(dof,
11926 12061 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
11927 12062 goto err;
11928 12063
11929 12064 str = (char *)((uintptr_t)dof +
11930 12065 (uintptr_t)strtab->dofs_offset);
11931 12066
11932 12067 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
11933 12068 if (str[i] == '\0')
11934 12069 break;
11935 12070 }
11936 12071
11937 12072 if (i >= strtab->dofs_size) {
11938 12073 dtrace_dof_error(dof, "bogus format string");
11939 12074 goto err;
11940 12075 }
11941 12076
11942 12077 if (i == desc->dofa_arg) {
11943 12078 dtrace_dof_error(dof, "empty format string");
11944 12079 goto err;
11945 12080 }
11946 12081
11947 12082 i -= desc->dofa_arg;
11948 12083 fmt = kmem_alloc(i + 1, KM_SLEEP);
11949 12084 bcopy(&str[desc->dofa_arg], fmt, i + 1);
11950 12085 arg = (uint64_t)(uintptr_t)fmt;
11951 12086 } else {
11952 12087 if (kind == DTRACEACT_PRINTA) {
11953 12088 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
11954 12089 arg = 0;
11955 12090 } else {
11956 12091 arg = desc->dofa_arg;
11957 12092 }
11958 12093 }
11959 12094
11960 12095 act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
11961 12096 desc->dofa_uarg, arg);
11962 12097
11963 12098 if (last != NULL) {
11964 12099 last->dtad_next = act;
11965 12100 } else {
11966 12101 first = act;
11967 12102 }
11968 12103
11969 12104 last = act;
11970 12105
11971 12106 if (desc->dofa_difo == DOF_SECIDX_NONE)
11972 12107 continue;
11973 12108
11974 12109 if ((difosec = dtrace_dof_sect(dof,
11975 12110 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
11976 12111 goto err;
11977 12112
11978 12113 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
11979 12114
11980 12115 if (act->dtad_difo == NULL)
11981 12116 goto err;
11982 12117 }
11983 12118
11984 12119 ASSERT(first != NULL);
11985 12120 return (first);
11986 12121
11987 12122 err:
11988 12123 for (act = first; act != NULL; act = next) {
11989 12124 next = act->dtad_next;
11990 12125 dtrace_actdesc_release(act, vstate);
11991 12126 }
11992 12127
11993 12128 return (NULL);
11994 12129 }
11995 12130
11996 12131 static dtrace_ecbdesc_t *
11997 12132 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
11998 12133 cred_t *cr)
11999 12134 {
12000 12135 dtrace_ecbdesc_t *ep;
12001 12136 dof_ecbdesc_t *ecb;
12002 12137 dtrace_probedesc_t *desc;
12003 12138 dtrace_predicate_t *pred = NULL;
12004 12139
12005 12140 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
12006 12141 dtrace_dof_error(dof, "truncated ECB description");
12007 12142 return (NULL);
12008 12143 }
12009 12144
12010 12145 if (sec->dofs_align != sizeof (uint64_t)) {
12011 12146 dtrace_dof_error(dof, "bad alignment in ECB description");
12012 12147 return (NULL);
12013 12148 }
12014 12149
12015 12150 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
12016 12151 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
12017 12152
12018 12153 if (sec == NULL)
12019 12154 return (NULL);
12020 12155
12021 12156 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
12022 12157 ep->dted_uarg = ecb->dofe_uarg;
12023 12158 desc = &ep->dted_probe;
12024 12159
12025 12160 if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
12026 12161 goto err;
12027 12162
12028 12163 if (ecb->dofe_pred != DOF_SECIDX_NONE) {
12029 12164 if ((sec = dtrace_dof_sect(dof,
12030 12165 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
12031 12166 goto err;
12032 12167
12033 12168 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
12034 12169 goto err;
12035 12170
12036 12171 ep->dted_pred.dtpdd_predicate = pred;
12037 12172 }
12038 12173
12039 12174 if (ecb->dofe_actions != DOF_SECIDX_NONE) {
12040 12175 if ((sec = dtrace_dof_sect(dof,
12041 12176 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
12042 12177 goto err;
12043 12178
12044 12179 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
12045 12180
12046 12181 if (ep->dted_action == NULL)
12047 12182 goto err;
12048 12183 }
12049 12184
12050 12185 return (ep);
12051 12186
12052 12187 err:
12053 12188 if (pred != NULL)
12054 12189 dtrace_predicate_release(pred, vstate);
12055 12190 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12056 12191 return (NULL);
12057 12192 }
12058 12193
12059 12194 /*
12060 12195 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
12061 12196 * specified DOF. At present, this amounts to simply adding 'ubase' to the
12062 12197 * site of any user SETX relocations to account for load object base address.
12063 12198 * In the future, if we need other relocations, this function can be extended.
12064 12199 */
12065 12200 static int
12066 12201 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase)
12067 12202 {
12068 12203 uintptr_t daddr = (uintptr_t)dof;
12069 12204 dof_relohdr_t *dofr =
12070 12205 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
12071 12206 dof_sec_t *ss, *rs, *ts;
12072 12207 dof_relodesc_t *r;
12073 12208 uint_t i, n;
12074 12209
12075 12210 if (sec->dofs_size < sizeof (dof_relohdr_t) ||
12076 12211 sec->dofs_align != sizeof (dof_secidx_t)) {
12077 12212 dtrace_dof_error(dof, "invalid relocation header");
12078 12213 return (-1);
12079 12214 }
12080 12215
12081 12216 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
12082 12217 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
12083 12218 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
12084 12219
12085 12220 if (ss == NULL || rs == NULL || ts == NULL)
12086 12221 return (-1); /* dtrace_dof_error() has been called already */
12087 12222
12088 12223 if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
12089 12224 rs->dofs_align != sizeof (uint64_t)) {
12090 12225 dtrace_dof_error(dof, "invalid relocation section");
12091 12226 return (-1);
12092 12227 }
12093 12228
12094 12229 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
12095 12230 n = rs->dofs_size / rs->dofs_entsize;
12096 12231
12097 12232 for (i = 0; i < n; i++) {
12098 12233 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
12099 12234
12100 12235 switch (r->dofr_type) {
12101 12236 case DOF_RELO_NONE:
12102 12237 break;
12103 12238 case DOF_RELO_SETX:
12104 12239 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
12105 12240 sizeof (uint64_t) > ts->dofs_size) {
12106 12241 dtrace_dof_error(dof, "bad relocation offset");
12107 12242 return (-1);
12108 12243 }
12109 12244
12110 12245 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
12111 12246 dtrace_dof_error(dof, "misaligned setx relo");
12112 12247 return (-1);
12113 12248 }
12114 12249
12115 12250 *(uint64_t *)taddr += ubase;
12116 12251 break;
12117 12252 default:
12118 12253 dtrace_dof_error(dof, "invalid relocation type");
12119 12254 return (-1);
12120 12255 }
12121 12256
12122 12257 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
12123 12258 }
12124 12259
12125 12260 return (0);
12126 12261 }
12127 12262
12128 12263 /*
12129 12264 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
12130 12265 * header: it should be at the front of a memory region that is at least
12131 12266 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
12132 12267 * size. It need not be validated in any other way.
12133 12268 */
12134 12269 static int
12135 12270 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
12136 12271 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
12137 12272 {
12138 12273 uint64_t len = dof->dofh_loadsz, seclen;
12139 12274 uintptr_t daddr = (uintptr_t)dof;
12140 12275 dtrace_ecbdesc_t *ep;
12141 12276 dtrace_enabling_t *enab;
12142 12277 uint_t i;
12143 12278
12144 12279 ASSERT(MUTEX_HELD(&dtrace_lock));
12145 12280 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
12146 12281
12147 12282 /*
12148 12283 * Check the DOF header identification bytes. In addition to checking
12149 12284 * valid settings, we also verify that unused bits/bytes are zeroed so
12150 12285 * we can use them later without fear of regressing existing binaries.
12151 12286 */
12152 12287 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
12153 12288 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
12154 12289 dtrace_dof_error(dof, "DOF magic string mismatch");
12155 12290 return (-1);
12156 12291 }
12157 12292
12158 12293 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
12159 12294 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
12160 12295 dtrace_dof_error(dof, "DOF has invalid data model");
12161 12296 return (-1);
12162 12297 }
12163 12298
12164 12299 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
12165 12300 dtrace_dof_error(dof, "DOF encoding mismatch");
12166 12301 return (-1);
12167 12302 }
12168 12303
12169 12304 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
12170 12305 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
12171 12306 dtrace_dof_error(dof, "DOF version mismatch");
12172 12307 return (-1);
12173 12308 }
12174 12309
12175 12310 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
12176 12311 dtrace_dof_error(dof, "DOF uses unsupported instruction set");
12177 12312 return (-1);
12178 12313 }
12179 12314
12180 12315 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
12181 12316 dtrace_dof_error(dof, "DOF uses too many integer registers");
12182 12317 return (-1);
12183 12318 }
12184 12319
12185 12320 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
12186 12321 dtrace_dof_error(dof, "DOF uses too many tuple registers");
12187 12322 return (-1);
12188 12323 }
12189 12324
12190 12325 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
12191 12326 if (dof->dofh_ident[i] != 0) {
12192 12327 dtrace_dof_error(dof, "DOF has invalid ident byte set");
12193 12328 return (-1);
12194 12329 }
12195 12330 }
12196 12331
12197 12332 if (dof->dofh_flags & ~DOF_FL_VALID) {
12198 12333 dtrace_dof_error(dof, "DOF has invalid flag bits set");
12199 12334 return (-1);
12200 12335 }
12201 12336
12202 12337 if (dof->dofh_secsize == 0) {
12203 12338 dtrace_dof_error(dof, "zero section header size");
12204 12339 return (-1);
12205 12340 }
12206 12341
12207 12342 /*
12208 12343 * Check that the section headers don't exceed the amount of DOF
12209 12344 * data. Note that we cast the section size and number of sections
12210 12345 * to uint64_t's to prevent possible overflow in the multiplication.
12211 12346 */
12212 12347 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
12213 12348
12214 12349 if (dof->dofh_secoff > len || seclen > len ||
12215 12350 dof->dofh_secoff + seclen > len) {
12216 12351 dtrace_dof_error(dof, "truncated section headers");
12217 12352 return (-1);
12218 12353 }
12219 12354
12220 12355 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
12221 12356 dtrace_dof_error(dof, "misaligned section headers");
12222 12357 return (-1);
12223 12358 }
12224 12359
12225 12360 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
12226 12361 dtrace_dof_error(dof, "misaligned section size");
12227 12362 return (-1);
12228 12363 }
12229 12364
12230 12365 /*
12231 12366 * Take an initial pass through the section headers to be sure that
12232 12367 * the headers don't have stray offsets. If the 'noprobes' flag is
12233 12368 * set, do not permit sections relating to providers, probes, or args.
12234 12369 */
12235 12370 for (i = 0; i < dof->dofh_secnum; i++) {
12236 12371 dof_sec_t *sec = (dof_sec_t *)(daddr +
12237 12372 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12238 12373
12239 12374 if (noprobes) {
12240 12375 switch (sec->dofs_type) {
12241 12376 case DOF_SECT_PROVIDER:
12242 12377 case DOF_SECT_PROBES:
12243 12378 case DOF_SECT_PRARGS:
12244 12379 case DOF_SECT_PROFFS:
12245 12380 dtrace_dof_error(dof, "illegal sections "
12246 12381 "for enabling");
12247 12382 return (-1);
12248 12383 }
12249 12384 }
12250 12385
12251 12386 if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
12252 12387 !(sec->dofs_flags & DOF_SECF_LOAD)) {
12253 12388 dtrace_dof_error(dof, "loadable section with load "
12254 12389 "flag unset");
12255 12390 return (-1);
12256 12391 }
12257 12392
12258 12393 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12259 12394 continue; /* just ignore non-loadable sections */
12260 12395
12261 12396 if (sec->dofs_align & (sec->dofs_align - 1)) {
12262 12397 dtrace_dof_error(dof, "bad section alignment");
12263 12398 return (-1);
12264 12399 }
12265 12400
12266 12401 if (sec->dofs_offset & (sec->dofs_align - 1)) {
12267 12402 dtrace_dof_error(dof, "misaligned section");
12268 12403 return (-1);
12269 12404 }
12270 12405
12271 12406 if (sec->dofs_offset > len || sec->dofs_size > len ||
12272 12407 sec->dofs_offset + sec->dofs_size > len) {
12273 12408 dtrace_dof_error(dof, "corrupt section header");
12274 12409 return (-1);
12275 12410 }
12276 12411
12277 12412 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
12278 12413 sec->dofs_offset + sec->dofs_size - 1) != '\0') {
12279 12414 dtrace_dof_error(dof, "non-terminating string table");
12280 12415 return (-1);
12281 12416 }
12282 12417 }
12283 12418
12284 12419 /*
12285 12420 * Take a second pass through the sections and locate and perform any
12286 12421 * relocations that are present. We do this after the first pass to
12287 12422 * be sure that all sections have had their headers validated.
12288 12423 */
12289 12424 for (i = 0; i < dof->dofh_secnum; i++) {
12290 12425 dof_sec_t *sec = (dof_sec_t *)(daddr +
12291 12426 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12292 12427
12293 12428 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12294 12429 continue; /* skip sections that are not loadable */
12295 12430
12296 12431 switch (sec->dofs_type) {
12297 12432 case DOF_SECT_URELHDR:
12298 12433 if (dtrace_dof_relocate(dof, sec, ubase) != 0)
12299 12434 return (-1);
12300 12435 break;
12301 12436 }
12302 12437 }
12303 12438
12304 12439 if ((enab = *enabp) == NULL)
12305 12440 enab = *enabp = dtrace_enabling_create(vstate);
12306 12441
12307 12442 for (i = 0; i < dof->dofh_secnum; i++) {
12308 12443 dof_sec_t *sec = (dof_sec_t *)(daddr +
12309 12444 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12310 12445
12311 12446 if (sec->dofs_type != DOF_SECT_ECBDESC)
12312 12447 continue;
12313 12448
12314 12449 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
12315 12450 dtrace_enabling_destroy(enab);
12316 12451 *enabp = NULL;
12317 12452 return (-1);
12318 12453 }
12319 12454
12320 12455 dtrace_enabling_add(enab, ep);
12321 12456 }
12322 12457
12323 12458 return (0);
12324 12459 }
12325 12460
12326 12461 /*
12327 12462 * Process DOF for any options. This routine assumes that the DOF has been
12328 12463 * at least processed by dtrace_dof_slurp().
12329 12464 */
12330 12465 static int
12331 12466 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
12332 12467 {
12333 12468 int i, rval;
12334 12469 uint32_t entsize;
12335 12470 size_t offs;
12336 12471 dof_optdesc_t *desc;
12337 12472
12338 12473 for (i = 0; i < dof->dofh_secnum; i++) {
12339 12474 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
12340 12475 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12341 12476
12342 12477 if (sec->dofs_type != DOF_SECT_OPTDESC)
12343 12478 continue;
12344 12479
12345 12480 if (sec->dofs_align != sizeof (uint64_t)) {
12346 12481 dtrace_dof_error(dof, "bad alignment in "
12347 12482 "option description");
12348 12483 return (EINVAL);
12349 12484 }
12350 12485
12351 12486 if ((entsize = sec->dofs_entsize) == 0) {
12352 12487 dtrace_dof_error(dof, "zeroed option entry size");
12353 12488 return (EINVAL);
12354 12489 }
12355 12490
12356 12491 if (entsize < sizeof (dof_optdesc_t)) {
12357 12492 dtrace_dof_error(dof, "bad option entry size");
12358 12493 return (EINVAL);
12359 12494 }
12360 12495
12361 12496 for (offs = 0; offs < sec->dofs_size; offs += entsize) {
12362 12497 desc = (dof_optdesc_t *)((uintptr_t)dof +
12363 12498 (uintptr_t)sec->dofs_offset + offs);
12364 12499
12365 12500 if (desc->dofo_strtab != DOF_SECIDX_NONE) {
12366 12501 dtrace_dof_error(dof, "non-zero option string");
12367 12502 return (EINVAL);
12368 12503 }
12369 12504
12370 12505 if (desc->dofo_value == DTRACEOPT_UNSET) {
12371 12506 dtrace_dof_error(dof, "unset option");
12372 12507 return (EINVAL);
12373 12508 }
12374 12509
12375 12510 if ((rval = dtrace_state_option(state,
12376 12511 desc->dofo_option, desc->dofo_value)) != 0) {
12377 12512 dtrace_dof_error(dof, "rejected option");
12378 12513 return (rval);
12379 12514 }
12380 12515 }
12381 12516 }
12382 12517
12383 12518 return (0);
12384 12519 }
12385 12520
12386 12521 /*
12387 12522 * DTrace Consumer State Functions
12388 12523 */
12389 12524 int
12390 12525 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
12391 12526 {
12392 12527 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
12393 12528 void *base;
12394 12529 uintptr_t limit;
12395 12530 dtrace_dynvar_t *dvar, *next, *start;
12396 12531 int i;
12397 12532
12398 12533 ASSERT(MUTEX_HELD(&dtrace_lock));
12399 12534 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
12400 12535
12401 12536 bzero(dstate, sizeof (dtrace_dstate_t));
12402 12537
12403 12538 if ((dstate->dtds_chunksize = chunksize) == 0)
12404 12539 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
12405 12540
12406 12541 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
12407 12542 size = min;
12408 12543
12409 12544 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12410 12545 return (ENOMEM);
12411 12546
12412 12547 dstate->dtds_size = size;
12413 12548 dstate->dtds_base = base;
12414 12549 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
12415 12550 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
12416 12551
12417 12552 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
12418 12553
12419 12554 if (hashsize != 1 && (hashsize & 1))
12420 12555 hashsize--;
12421 12556
12422 12557 dstate->dtds_hashsize = hashsize;
12423 12558 dstate->dtds_hash = dstate->dtds_base;
12424 12559
12425 12560 /*
12426 12561 * Set all of our hash buckets to point to the single sink, and (if
12427 12562 * it hasn't already been set), set the sink's hash value to be the
12428 12563 * sink sentinel value. The sink is needed for dynamic variable
12429 12564 * lookups to know that they have iterated over an entire, valid hash
12430 12565 * chain.
12431 12566 */
12432 12567 for (i = 0; i < hashsize; i++)
12433 12568 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
12434 12569
12435 12570 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
12436 12571 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
12437 12572
12438 12573 /*
12439 12574 * Determine number of active CPUs. Divide free list evenly among
12440 12575 * active CPUs.
12441 12576 */
12442 12577 start = (dtrace_dynvar_t *)
12443 12578 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
12444 12579 limit = (uintptr_t)base + size;
12445 12580
12446 12581 maxper = (limit - (uintptr_t)start) / NCPU;
12447 12582 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
12448 12583
12449 12584 for (i = 0; i < NCPU; i++) {
12450 12585 dstate->dtds_percpu[i].dtdsc_free = dvar = start;
12451 12586
12452 12587 /*
12453 12588 * If we don't even have enough chunks to make it once through
12454 12589 * NCPUs, we're just going to allocate everything to the first
12455 12590 * CPU. And if we're on the last CPU, we're going to allocate
12456 12591 * whatever is left over. In either case, we set the limit to
12457 12592 * be the limit of the dynamic variable space.
12458 12593 */
12459 12594 if (maxper == 0 || i == NCPU - 1) {
12460 12595 limit = (uintptr_t)base + size;
12461 12596 start = NULL;
12462 12597 } else {
12463 12598 limit = (uintptr_t)start + maxper;
12464 12599 start = (dtrace_dynvar_t *)limit;
12465 12600 }
12466 12601
12467 12602 ASSERT(limit <= (uintptr_t)base + size);
12468 12603
12469 12604 for (;;) {
12470 12605 next = (dtrace_dynvar_t *)((uintptr_t)dvar +
12471 12606 dstate->dtds_chunksize);
12472 12607
12473 12608 if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
12474 12609 break;
12475 12610
12476 12611 dvar->dtdv_next = next;
12477 12612 dvar = next;
12478 12613 }
12479 12614
12480 12615 if (maxper == 0)
12481 12616 break;
12482 12617 }
12483 12618
12484 12619 return (0);
12485 12620 }
12486 12621
12487 12622 void
12488 12623 dtrace_dstate_fini(dtrace_dstate_t *dstate)
12489 12624 {
12490 12625 ASSERT(MUTEX_HELD(&cpu_lock));
12491 12626
12492 12627 if (dstate->dtds_base == NULL)
12493 12628 return;
12494 12629
12495 12630 kmem_free(dstate->dtds_base, dstate->dtds_size);
12496 12631 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
12497 12632 }
12498 12633
12499 12634 static void
12500 12635 dtrace_vstate_fini(dtrace_vstate_t *vstate)
12501 12636 {
12502 12637 /*
12503 12638 * Logical XOR, where are you?
12504 12639 */
12505 12640 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
12506 12641
12507 12642 if (vstate->dtvs_nglobals > 0) {
12508 12643 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
12509 12644 sizeof (dtrace_statvar_t *));
12510 12645 }
12511 12646
12512 12647 if (vstate->dtvs_ntlocals > 0) {
12513 12648 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
12514 12649 sizeof (dtrace_difv_t));
12515 12650 }
12516 12651
12517 12652 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
12518 12653
12519 12654 if (vstate->dtvs_nlocals > 0) {
12520 12655 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
12521 12656 sizeof (dtrace_statvar_t *));
12522 12657 }
12523 12658 }
12524 12659
12525 12660 static void
12526 12661 dtrace_state_clean(dtrace_state_t *state)
12527 12662 {
12528 12663 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
12529 12664 return;
12530 12665
12531 12666 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
12532 12667 dtrace_speculation_clean(state);
12533 12668 }
12534 12669
12535 12670 static void
12536 12671 dtrace_state_deadman(dtrace_state_t *state)
12537 12672 {
12538 12673 hrtime_t now;
12539 12674
12540 12675 dtrace_sync();
12541 12676
12542 12677 now = dtrace_gethrtime();
12543 12678
12544 12679 if (state != dtrace_anon.dta_state &&
12545 12680 now - state->dts_laststatus >= dtrace_deadman_user)
12546 12681 return;
12547 12682
12548 12683 /*
12549 12684 * We must be sure that dts_alive never appears to be less than the
12550 12685 * value upon entry to dtrace_state_deadman(), and because we lack a
12551 12686 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12552 12687 * store INT64_MAX to it, followed by a memory barrier, followed by
12553 12688 * the new value. This assures that dts_alive never appears to be
12554 12689 * less than its true value, regardless of the order in which the
12555 12690 * stores to the underlying storage are issued.
12556 12691 */
12557 12692 state->dts_alive = INT64_MAX;
12558 12693 dtrace_membar_producer();
12559 12694 state->dts_alive = now;
12560 12695 }
12561 12696
12562 12697 dtrace_state_t *
12563 12698 dtrace_state_create(dev_t *devp, cred_t *cr)
12564 12699 {
12565 12700 minor_t minor;
12566 12701 major_t major;
12567 12702 char c[30];
12568 12703 dtrace_state_t *state;
12569 12704 dtrace_optval_t *opt;
12570 12705 int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
12571 12706
12572 12707 ASSERT(MUTEX_HELD(&dtrace_lock));
12573 12708 ASSERT(MUTEX_HELD(&cpu_lock));
12574 12709
12575 12710 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
12576 12711 VM_BESTFIT | VM_SLEEP);
12577 12712
12578 12713 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
12579 12714 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
12580 12715 return (NULL);
12581 12716 }
12582 12717
12583 12718 state = ddi_get_soft_state(dtrace_softstate, minor);
12584 12719 state->dts_epid = DTRACE_EPIDNONE + 1;
12585 12720
12586 12721 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
12587 12722 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
12588 12723 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
12589 12724
12590 12725 if (devp != NULL) {
12591 12726 major = getemajor(*devp);
12592 12727 } else {
12593 12728 major = ddi_driver_major(dtrace_devi);
12594 12729 }
12595 12730
12596 12731 state->dts_dev = makedevice(major, minor);
12597 12732
12598 12733 if (devp != NULL)
12599 12734 *devp = state->dts_dev;
12600 12735
12601 12736 /*
12602 12737 * We allocate NCPU buffers. On the one hand, this can be quite
12603 12738 * a bit of memory per instance (nearly 36K on a Starcat). On the
12604 12739 * other hand, it saves an additional memory reference in the probe
12605 12740 * path.
12606 12741 */
12607 12742 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
12608 12743 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
12609 12744 state->dts_cleaner = CYCLIC_NONE;
12610 12745 state->dts_deadman = CYCLIC_NONE;
12611 12746 state->dts_vstate.dtvs_state = state;
12612 12747
12613 12748 for (i = 0; i < DTRACEOPT_MAX; i++)
12614 12749 state->dts_options[i] = DTRACEOPT_UNSET;
12615 12750
12616 12751 /*
12617 12752 * Set the default options.
12618 12753 */
12619 12754 opt = state->dts_options;
12620 12755 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
12621 12756 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
12622 12757 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
12623 12758 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
12624 12759 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
12625 12760 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
12626 12761 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
12627 12762 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
12628 12763 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
12629 12764 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
12630 12765 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
12631 12766 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
12632 12767 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
12633 12768 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
12634 12769
12635 12770 state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
12636 12771
12637 12772 /*
12638 12773 * Depending on the user credentials, we set flag bits which alter probe
12639 12774 * visibility or the amount of destructiveness allowed. In the case of
12640 12775 * actual anonymous tracing, or the possession of all privileges, all of
12641 12776 * the normal checks are bypassed.
12642 12777 */
12643 12778 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
12644 12779 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
12645 12780 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
12646 12781 } else {
12647 12782 /*
12648 12783 * Set up the credentials for this instantiation. We take a
12649 12784 * hold on the credential to prevent it from disappearing on
12650 12785 * us; this in turn prevents the zone_t referenced by this
12651 12786 * credential from disappearing. This means that we can
12652 12787 * examine the credential and the zone from probe context.
12653 12788 */
12654 12789 crhold(cr);
12655 12790 state->dts_cred.dcr_cred = cr;
12656 12791
12657 12792 /*
12658 12793 * CRA_PROC means "we have *some* privilege for dtrace" and
12659 12794 * unlocks the use of variables like pid, zonename, etc.
12660 12795 */
12661 12796 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
12662 12797 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
12663 12798 state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
12664 12799 }
12665 12800
12666 12801 /*
12667 12802 * dtrace_user allows use of syscall and profile providers.
12668 12803 * If the user also has proc_owner and/or proc_zone, we
12669 12804 * extend the scope to include additional visibility and
12670 12805 * destructive power.
12671 12806 */
12672 12807 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
12673 12808 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
12674 12809 state->dts_cred.dcr_visible |=
12675 12810 DTRACE_CRV_ALLPROC;
12676 12811
12677 12812 state->dts_cred.dcr_action |=
12678 12813 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12679 12814 }
12680 12815
12681 12816 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
12682 12817 state->dts_cred.dcr_visible |=
12683 12818 DTRACE_CRV_ALLZONE;
12684 12819
12685 12820 state->dts_cred.dcr_action |=
12686 12821 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12687 12822 }
12688 12823
12689 12824 /*
12690 12825 * If we have all privs in whatever zone this is,
12691 12826 * we can do destructive things to processes which
12692 12827 * have altered credentials.
12693 12828 */
12694 12829 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
12695 12830 cr->cr_zone->zone_privset)) {
12696 12831 state->dts_cred.dcr_action |=
12697 12832 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
12698 12833 }
12699 12834 }
12700 12835
12701 12836 /*
12702 12837 * Holding the dtrace_kernel privilege also implies that
12703 12838 * the user has the dtrace_user privilege from a visibility
12704 12839 * perspective. But without further privileges, some
12705 12840 * destructive actions are not available.
12706 12841 */
12707 12842 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
12708 12843 /*
12709 12844 * Make all probes in all zones visible. However,
12710 12845 * this doesn't mean that all actions become available
12711 12846 * to all zones.
12712 12847 */
12713 12848 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
12714 12849 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
12715 12850
12716 12851 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
12717 12852 DTRACE_CRA_PROC;
12718 12853 /*
12719 12854 * Holding proc_owner means that destructive actions
12720 12855 * for *this* zone are allowed.
12721 12856 */
12722 12857 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
12723 12858 state->dts_cred.dcr_action |=
12724 12859 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12725 12860
12726 12861 /*
12727 12862 * Holding proc_zone means that destructive actions
12728 12863 * for this user/group ID in all zones is allowed.
12729 12864 */
12730 12865 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
12731 12866 state->dts_cred.dcr_action |=
12732 12867 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12733 12868
12734 12869 /*
12735 12870 * If we have all privs in whatever zone this is,
12736 12871 * we can do destructive things to processes which
12737 12872 * have altered credentials.
12738 12873 */
12739 12874 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
12740 12875 cr->cr_zone->zone_privset)) {
12741 12876 state->dts_cred.dcr_action |=
12742 12877 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
12743 12878 }
12744 12879 }
12745 12880
12746 12881 /*
12747 12882 * Holding the dtrace_proc privilege gives control over fasttrap
12748 12883 * and pid providers. We need to grant wider destructive
12749 12884 * privileges in the event that the user has proc_owner and/or
12750 12885 * proc_zone.
12751 12886 */
12752 12887 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
12753 12888 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
12754 12889 state->dts_cred.dcr_action |=
12755 12890 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
12756 12891
12757 12892 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
12758 12893 state->dts_cred.dcr_action |=
12759 12894 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
12760 12895 }
12761 12896 }
12762 12897
12763 12898 return (state);
12764 12899 }
12765 12900
12766 12901 static int
12767 12902 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
12768 12903 {
12769 12904 dtrace_optval_t *opt = state->dts_options, size;
12770 12905 processorid_t cpu;
12771 12906 int flags = 0, rval, factor, divisor = 1;
12772 12907
12773 12908 ASSERT(MUTEX_HELD(&dtrace_lock));
12774 12909 ASSERT(MUTEX_HELD(&cpu_lock));
12775 12910 ASSERT(which < DTRACEOPT_MAX);
12776 12911 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
12777 12912 (state == dtrace_anon.dta_state &&
12778 12913 state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
12779 12914
12780 12915 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
12781 12916 return (0);
12782 12917
12783 12918 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
12784 12919 cpu = opt[DTRACEOPT_CPU];
12785 12920
12786 12921 if (which == DTRACEOPT_SPECSIZE)
12787 12922 flags |= DTRACEBUF_NOSWITCH;
12788 12923
12789 12924 if (which == DTRACEOPT_BUFSIZE) {
12790 12925 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
12791 12926 flags |= DTRACEBUF_RING;
12792 12927
12793 12928 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
12794 12929 flags |= DTRACEBUF_FILL;
12795 12930
12796 12931 if (state != dtrace_anon.dta_state ||
12797 12932 state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
12798 12933 flags |= DTRACEBUF_INACTIVE;
12799 12934 }
12800 12935
12801 12936 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
12802 12937 /*
12803 12938 * The size must be 8-byte aligned. If the size is not 8-byte
12804 12939 * aligned, drop it down by the difference.
12805 12940 */
12806 12941 if (size & (sizeof (uint64_t) - 1))
12807 12942 size -= size & (sizeof (uint64_t) - 1);
12808 12943
12809 12944 if (size < state->dts_reserve) {
12810 12945 /*
12811 12946 * Buffers always must be large enough to accommodate
12812 12947 * their prereserved space. We return E2BIG instead
12813 12948 * of ENOMEM in this case to allow for user-level
12814 12949 * software to differentiate the cases.
12815 12950 */
12816 12951 return (E2BIG);
12817 12952 }
12818 12953
12819 12954 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
12820 12955
12821 12956 if (rval != ENOMEM) {
12822 12957 opt[which] = size;
12823 12958 return (rval);
12824 12959 }
12825 12960
12826 12961 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
12827 12962 return (rval);
12828 12963
12829 12964 for (divisor = 2; divisor < factor; divisor <<= 1)
12830 12965 continue;
12831 12966 }
12832 12967
12833 12968 return (ENOMEM);
12834 12969 }
12835 12970
12836 12971 static int
12837 12972 dtrace_state_buffers(dtrace_state_t *state)
12838 12973 {
12839 12974 dtrace_speculation_t *spec = state->dts_speculations;
12840 12975 int rval, i;
12841 12976
12842 12977 if ((rval = dtrace_state_buffer(state, state->dts_buffer,
12843 12978 DTRACEOPT_BUFSIZE)) != 0)
12844 12979 return (rval);
12845 12980
12846 12981 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
12847 12982 DTRACEOPT_AGGSIZE)) != 0)
12848 12983 return (rval);
12849 12984
12850 12985 for (i = 0; i < state->dts_nspeculations; i++) {
12851 12986 if ((rval = dtrace_state_buffer(state,
12852 12987 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
12853 12988 return (rval);
12854 12989 }
12855 12990
12856 12991 return (0);
12857 12992 }
12858 12993
12859 12994 static void
12860 12995 dtrace_state_prereserve(dtrace_state_t *state)
12861 12996 {
12862 12997 dtrace_ecb_t *ecb;
12863 12998 dtrace_probe_t *probe;
12864 12999
12865 13000 state->dts_reserve = 0;
12866 13001
12867 13002 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
12868 13003 return;
12869 13004
12870 13005 /*
12871 13006 * If our buffer policy is a "fill" buffer policy, we need to set the
12872 13007 * prereserved space to be the space required by the END probes.
12873 13008 */
12874 13009 probe = dtrace_probes[dtrace_probeid_end - 1];
12875 13010 ASSERT(probe != NULL);
12876 13011
12877 13012 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
12878 13013 if (ecb->dte_state != state)
12879 13014 continue;
12880 13015
12881 13016 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
12882 13017 }
12883 13018 }
12884 13019
12885 13020 static int
12886 13021 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
12887 13022 {
12888 13023 dtrace_optval_t *opt = state->dts_options, sz, nspec;
12889 13024 dtrace_speculation_t *spec;
12890 13025 dtrace_buffer_t *buf;
12891 13026 cyc_handler_t hdlr;
12892 13027 cyc_time_t when;
12893 13028 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
12894 13029 dtrace_icookie_t cookie;
12895 13030
12896 13031 mutex_enter(&cpu_lock);
12897 13032 mutex_enter(&dtrace_lock);
12898 13033
12899 13034 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
12900 13035 rval = EBUSY;
12901 13036 goto out;
12902 13037 }
12903 13038
12904 13039 /*
12905 13040 * Before we can perform any checks, we must prime all of the
12906 13041 * retained enablings that correspond to this state.
12907 13042 */
12908 13043 dtrace_enabling_prime(state);
12909 13044
12910 13045 if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
12911 13046 rval = EACCES;
12912 13047 goto out;
12913 13048 }
12914 13049
12915 13050 dtrace_state_prereserve(state);
12916 13051
12917 13052 /*
12918 13053 * Now we want to do is try to allocate our speculations.
12919 13054 * We do not automatically resize the number of speculations; if
12920 13055 * this fails, we will fail the operation.
12921 13056 */
12922 13057 nspec = opt[DTRACEOPT_NSPEC];
12923 13058 ASSERT(nspec != DTRACEOPT_UNSET);
12924 13059
12925 13060 if (nspec > INT_MAX) {
12926 13061 rval = ENOMEM;
12927 13062 goto out;
12928 13063 }
12929 13064
12930 13065 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
12931 13066 KM_NOSLEEP | KM_NORMALPRI);
12932 13067
12933 13068 if (spec == NULL) {
12934 13069 rval = ENOMEM;
12935 13070 goto out;
12936 13071 }
12937 13072
12938 13073 state->dts_speculations = spec;
12939 13074 state->dts_nspeculations = (int)nspec;
12940 13075
12941 13076 for (i = 0; i < nspec; i++) {
12942 13077 if ((buf = kmem_zalloc(bufsize,
12943 13078 KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
12944 13079 rval = ENOMEM;
12945 13080 goto err;
12946 13081 }
12947 13082
12948 13083 spec[i].dtsp_buffer = buf;
12949 13084 }
12950 13085
12951 13086 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
12952 13087 if (dtrace_anon.dta_state == NULL) {
12953 13088 rval = ENOENT;
12954 13089 goto out;
12955 13090 }
12956 13091
12957 13092 if (state->dts_necbs != 0) {
12958 13093 rval = EALREADY;
12959 13094 goto out;
12960 13095 }
12961 13096
12962 13097 state->dts_anon = dtrace_anon_grab();
12963 13098 ASSERT(state->dts_anon != NULL);
12964 13099 state = state->dts_anon;
12965 13100
12966 13101 /*
12967 13102 * We want "grabanon" to be set in the grabbed state, so we'll
12968 13103 * copy that option value from the grabbing state into the
12969 13104 * grabbed state.
12970 13105 */
12971 13106 state->dts_options[DTRACEOPT_GRABANON] =
12972 13107 opt[DTRACEOPT_GRABANON];
12973 13108
12974 13109 *cpu = dtrace_anon.dta_beganon;
12975 13110
12976 13111 /*
12977 13112 * If the anonymous state is active (as it almost certainly
12978 13113 * is if the anonymous enabling ultimately matched anything),
12979 13114 * we don't allow any further option processing -- but we
12980 13115 * don't return failure.
12981 13116 */
12982 13117 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
12983 13118 goto out;
12984 13119 }
12985 13120
12986 13121 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
12987 13122 opt[DTRACEOPT_AGGSIZE] != 0) {
12988 13123 if (state->dts_aggregations == NULL) {
12989 13124 /*
12990 13125 * We're not going to create an aggregation buffer
12991 13126 * because we don't have any ECBs that contain
12992 13127 * aggregations -- set this option to 0.
12993 13128 */
12994 13129 opt[DTRACEOPT_AGGSIZE] = 0;
12995 13130 } else {
12996 13131 /*
12997 13132 * If we have an aggregation buffer, we must also have
12998 13133 * a buffer to use as scratch.
12999 13134 */
13000 13135 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
13001 13136 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
13002 13137 opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
13003 13138 }
13004 13139 }
13005 13140 }
13006 13141
13007 13142 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
13008 13143 opt[DTRACEOPT_SPECSIZE] != 0) {
13009 13144 if (!state->dts_speculates) {
13010 13145 /*
13011 13146 * We're not going to create speculation buffers
13012 13147 * because we don't have any ECBs that actually
13013 13148 * speculate -- set the speculation size to 0.
13014 13149 */
13015 13150 opt[DTRACEOPT_SPECSIZE] = 0;
13016 13151 }
13017 13152 }
13018 13153
13019 13154 /*
13020 13155 * The bare minimum size for any buffer that we're actually going to
13021 13156 * do anything to is sizeof (uint64_t).
13022 13157 */
13023 13158 sz = sizeof (uint64_t);
13024 13159
13025 13160 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
13026 13161 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
13027 13162 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
13028 13163 /*
13029 13164 * A buffer size has been explicitly set to 0 (or to a size
13030 13165 * that will be adjusted to 0) and we need the space -- we
13031 13166 * need to return failure. We return ENOSPC to differentiate
13032 13167 * it from failing to allocate a buffer due to failure to meet
13033 13168 * the reserve (for which we return E2BIG).
13034 13169 */
13035 13170 rval = ENOSPC;
13036 13171 goto out;
13037 13172 }
13038 13173
13039 13174 if ((rval = dtrace_state_buffers(state)) != 0)
13040 13175 goto err;
13041 13176
13042 13177 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
13043 13178 sz = dtrace_dstate_defsize;
13044 13179
13045 13180 do {
13046 13181 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
13047 13182
13048 13183 if (rval == 0)
13049 13184 break;
13050 13185
13051 13186 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13052 13187 goto err;
13053 13188 } while (sz >>= 1);
13054 13189
13055 13190 opt[DTRACEOPT_DYNVARSIZE] = sz;
13056 13191
13057 13192 if (rval != 0)
13058 13193 goto err;
13059 13194
13060 13195 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
13061 13196 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
13062 13197
13063 13198 if (opt[DTRACEOPT_CLEANRATE] == 0)
13064 13199 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13065 13200
13066 13201 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
13067 13202 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
13068 13203
13069 13204 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
13070 13205 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13071 13206
13072 13207 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
13073 13208 hdlr.cyh_arg = state;
13074 13209 hdlr.cyh_level = CY_LOW_LEVEL;
13075 13210
13076 13211 when.cyt_when = 0;
13077 13212 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13078 13213
13079 13214 state->dts_cleaner = cyclic_add(&hdlr, &when);
13080 13215
13081 13216 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13082 13217 hdlr.cyh_arg = state;
↓ open down ↓ |
4622 lines elided |
↑ open up ↑ |
13083 13218 hdlr.cyh_level = CY_LOW_LEVEL;
13084 13219
13085 13220 when.cyt_when = 0;
13086 13221 when.cyt_interval = dtrace_deadman_interval;
13087 13222
13088 13223 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13089 13224 state->dts_deadman = cyclic_add(&hdlr, &when);
13090 13225
13091 13226 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13092 13227
13228 + if (state->dts_getf != 0 &&
13229 + !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13230 + /*
13231 + * We don't have kernel privs but we have at least one call
13232 + * to getf(); we need to bump our zone's count, and (if
13233 + * this is the first enabling to have an unprivileged call
13234 + * to getf()) we need to hook into closef().
13235 + */
13236 + state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
13237 +
13238 + if (dtrace_getf++ == 0) {
13239 + ASSERT(dtrace_closef == NULL);
13240 + dtrace_closef = dtrace_getf_barrier;
13241 + }
13242 + }
13243 +
13093 13244 /*
13094 13245 * Now it's time to actually fire the BEGIN probe. We need to disable
13095 13246 * interrupts here both to record the CPU on which we fired the BEGIN
13096 13247 * probe (the data from this CPU will be processed first at user
13097 13248 * level) and to manually activate the buffer for this CPU.
13098 13249 */
13099 13250 cookie = dtrace_interrupt_disable();
13100 13251 *cpu = CPU->cpu_id;
13101 13252 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13102 13253 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13103 13254
13104 13255 dtrace_probe(dtrace_probeid_begin,
13105 13256 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13106 13257 dtrace_interrupt_enable(cookie);
13107 13258 /*
13108 13259 * We may have had an exit action from a BEGIN probe; only change our
13109 13260 * state to ACTIVE if we're still in WARMUP.
13110 13261 */
13111 13262 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13112 13263 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13113 13264
13114 13265 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
13115 13266 state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
13116 13267
13117 13268 /*
13118 13269 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
13119 13270 * want each CPU to transition its principal buffer out of the
13120 13271 * INACTIVE state. Doing this assures that no CPU will suddenly begin
13121 13272 * processing an ECB halfway down a probe's ECB chain; all CPUs will
13122 13273 * atomically transition from processing none of a state's ECBs to
13123 13274 * processing all of them.
13124 13275 */
13125 13276 dtrace_xcall(DTRACE_CPUALL,
13126 13277 (dtrace_xcall_t)dtrace_buffer_activate, state);
13127 13278 goto out;
13128 13279
13129 13280 err:
13130 13281 dtrace_buffer_free(state->dts_buffer);
13131 13282 dtrace_buffer_free(state->dts_aggbuffer);
13132 13283
13133 13284 if ((nspec = state->dts_nspeculations) == 0) {
13134 13285 ASSERT(state->dts_speculations == NULL);
13135 13286 goto out;
13136 13287 }
13137 13288
13138 13289 spec = state->dts_speculations;
13139 13290 ASSERT(spec != NULL);
13140 13291
13141 13292 for (i = 0; i < state->dts_nspeculations; i++) {
13142 13293 if ((buf = spec[i].dtsp_buffer) == NULL)
13143 13294 break;
13144 13295
13145 13296 dtrace_buffer_free(buf);
13146 13297 kmem_free(buf, bufsize);
13147 13298 }
13148 13299
13149 13300 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13150 13301 state->dts_nspeculations = 0;
13151 13302 state->dts_speculations = NULL;
13152 13303
13153 13304 out:
13154 13305 mutex_exit(&dtrace_lock);
13155 13306 mutex_exit(&cpu_lock);
13156 13307
13157 13308 return (rval);
13158 13309 }
13159 13310
13160 13311 static int
13161 13312 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
13162 13313 {
13163 13314 dtrace_icookie_t cookie;
13164 13315
13165 13316 ASSERT(MUTEX_HELD(&dtrace_lock));
13166 13317
13167 13318 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
13168 13319 state->dts_activity != DTRACE_ACTIVITY_DRAINING)
13169 13320 return (EINVAL);
13170 13321
13171 13322 /*
13172 13323 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
13173 13324 * to be sure that every CPU has seen it. See below for the details
13174 13325 * on why this is done.
13175 13326 */
13176 13327 state->dts_activity = DTRACE_ACTIVITY_DRAINING;
13177 13328 dtrace_sync();
13178 13329
13179 13330 /*
13180 13331 * By this point, it is impossible for any CPU to be still processing
13181 13332 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
13182 13333 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
13183 13334 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
13184 13335 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
13185 13336 * iff we're in the END probe.
13186 13337 */
13187 13338 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
13188 13339 dtrace_sync();
13189 13340 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13190 13341
13191 13342 /*
13192 13343 * Finally, we can release the reserve and call the END probe. We
13193 13344 * disable interrupts across calling the END probe to allow us to
13194 13345 * return the CPU on which we actually called the END probe. This
13195 13346 * allows user-land to be sure that this CPU's principal buffer is
13196 13347 * processed last.
13197 13348 */
13198 13349 state->dts_reserve = 0;
↓ open down ↓ |
96 lines elided |
↑ open up ↑ |
13199 13350
13200 13351 cookie = dtrace_interrupt_disable();
13201 13352 *cpu = CPU->cpu_id;
13202 13353 dtrace_probe(dtrace_probeid_end,
13203 13354 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13204 13355 dtrace_interrupt_enable(cookie);
13205 13356
13206 13357 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13207 13358 dtrace_sync();
13208 13359
13360 + if (state->dts_getf != 0 &&
13361 + !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13362 + /*
13363 + * We don't have kernel privs but we have at least one call
13364 + * to getf(); we need to lower our zone's count, and (if
13365 + * this is the last enabling to have an unprivileged call
13366 + * to getf()) we need to clear the closef() hook.
13367 + */
13368 + ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
13369 + ASSERT(dtrace_closef == dtrace_getf_barrier);
13370 + ASSERT(dtrace_getf > 0);
13371 +
13372 + state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
13373 +
13374 + if (--dtrace_getf == 0)
13375 + dtrace_closef = NULL;
13376 + }
13377 +
13209 13378 return (0);
13210 13379 }
13211 13380
13212 13381 static int
13213 13382 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13214 13383 dtrace_optval_t val)
13215 13384 {
13216 13385 ASSERT(MUTEX_HELD(&dtrace_lock));
13217 13386
13218 13387 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13219 13388 return (EBUSY);
13220 13389
13221 13390 if (option >= DTRACEOPT_MAX)
13222 13391 return (EINVAL);
13223 13392
13224 13393 if (option != DTRACEOPT_CPU && val < 0)
13225 13394 return (EINVAL);
13226 13395
13227 13396 switch (option) {
13228 13397 case DTRACEOPT_DESTRUCTIVE:
13229 13398 if (dtrace_destructive_disallow)
13230 13399 return (EACCES);
13231 13400
13232 13401 state->dts_cred.dcr_destructive = 1;
13233 13402 break;
13234 13403
13235 13404 case DTRACEOPT_BUFSIZE:
13236 13405 case DTRACEOPT_DYNVARSIZE:
13237 13406 case DTRACEOPT_AGGSIZE:
13238 13407 case DTRACEOPT_SPECSIZE:
13239 13408 case DTRACEOPT_STRSIZE:
13240 13409 if (val < 0)
13241 13410 return (EINVAL);
13242 13411
13243 13412 if (val >= LONG_MAX) {
13244 13413 /*
13245 13414 * If this is an otherwise negative value, set it to
13246 13415 * the highest multiple of 128m less than LONG_MAX.
13247 13416 * Technically, we're adjusting the size without
13248 13417 * regard to the buffer resizing policy, but in fact,
13249 13418 * this has no effect -- if we set the buffer size to
13250 13419 * ~LONG_MAX and the buffer policy is ultimately set to
13251 13420 * be "manual", the buffer allocation is guaranteed to
13252 13421 * fail, if only because the allocation requires two
13253 13422 * buffers. (We set the the size to the highest
13254 13423 * multiple of 128m because it ensures that the size
13255 13424 * will remain a multiple of a megabyte when
13256 13425 * repeatedly halved -- all the way down to 15m.)
13257 13426 */
13258 13427 val = LONG_MAX - (1 << 27) + 1;
13259 13428 }
13260 13429 }
13261 13430
13262 13431 state->dts_options[option] = val;
13263 13432
13264 13433 return (0);
13265 13434 }
13266 13435
13267 13436 static void
13268 13437 dtrace_state_destroy(dtrace_state_t *state)
13269 13438 {
13270 13439 dtrace_ecb_t *ecb;
13271 13440 dtrace_vstate_t *vstate = &state->dts_vstate;
13272 13441 minor_t minor = getminor(state->dts_dev);
13273 13442 int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
13274 13443 dtrace_speculation_t *spec = state->dts_speculations;
13275 13444 int nspec = state->dts_nspeculations;
13276 13445 uint32_t match;
13277 13446
13278 13447 ASSERT(MUTEX_HELD(&dtrace_lock));
13279 13448 ASSERT(MUTEX_HELD(&cpu_lock));
13280 13449
13281 13450 /*
13282 13451 * First, retract any retained enablings for this state.
13283 13452 */
13284 13453 dtrace_enabling_retract(state);
13285 13454 ASSERT(state->dts_nretained == 0);
13286 13455
13287 13456 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
13288 13457 state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
13289 13458 /*
13290 13459 * We have managed to come into dtrace_state_destroy() on a
13291 13460 * hot enabling -- almost certainly because of a disorderly
13292 13461 * shutdown of a consumer. (That is, a consumer that is
13293 13462 * exiting without having called dtrace_stop().) In this case,
13294 13463 * we're going to set our activity to be KILLED, and then
13295 13464 * issue a sync to be sure that everyone is out of probe
13296 13465 * context before we start blowing away ECBs.
13297 13466 */
13298 13467 state->dts_activity = DTRACE_ACTIVITY_KILLED;
13299 13468 dtrace_sync();
13300 13469 }
13301 13470
13302 13471 /*
13303 13472 * Release the credential hold we took in dtrace_state_create().
13304 13473 */
13305 13474 if (state->dts_cred.dcr_cred != NULL)
13306 13475 crfree(state->dts_cred.dcr_cred);
13307 13476
13308 13477 /*
13309 13478 * Now we can safely disable and destroy any enabled probes. Because
13310 13479 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
13311 13480 * (especially if they're all enabled), we take two passes through the
13312 13481 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
13313 13482 * in the second we disable whatever is left over.
13314 13483 */
13315 13484 for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
13316 13485 for (i = 0; i < state->dts_necbs; i++) {
13317 13486 if ((ecb = state->dts_ecbs[i]) == NULL)
13318 13487 continue;
13319 13488
13320 13489 if (match && ecb->dte_probe != NULL) {
13321 13490 dtrace_probe_t *probe = ecb->dte_probe;
13322 13491 dtrace_provider_t *prov = probe->dtpr_provider;
13323 13492
13324 13493 if (!(prov->dtpv_priv.dtpp_flags & match))
13325 13494 continue;
13326 13495 }
13327 13496
13328 13497 dtrace_ecb_disable(ecb);
13329 13498 dtrace_ecb_destroy(ecb);
13330 13499 }
13331 13500
13332 13501 if (!match)
13333 13502 break;
13334 13503 }
13335 13504
13336 13505 /*
13337 13506 * Before we free the buffers, perform one more sync to assure that
13338 13507 * every CPU is out of probe context.
13339 13508 */
13340 13509 dtrace_sync();
13341 13510
13342 13511 dtrace_buffer_free(state->dts_buffer);
13343 13512 dtrace_buffer_free(state->dts_aggbuffer);
13344 13513
13345 13514 for (i = 0; i < nspec; i++)
13346 13515 dtrace_buffer_free(spec[i].dtsp_buffer);
13347 13516
13348 13517 if (state->dts_cleaner != CYCLIC_NONE)
13349 13518 cyclic_remove(state->dts_cleaner);
13350 13519
13351 13520 if (state->dts_deadman != CYCLIC_NONE)
13352 13521 cyclic_remove(state->dts_deadman);
13353 13522
13354 13523 dtrace_dstate_fini(&vstate->dtvs_dynvars);
13355 13524 dtrace_vstate_fini(vstate);
13356 13525 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
13357 13526
13358 13527 if (state->dts_aggregations != NULL) {
13359 13528 #ifdef DEBUG
13360 13529 for (i = 0; i < state->dts_naggregations; i++)
13361 13530 ASSERT(state->dts_aggregations[i] == NULL);
13362 13531 #endif
13363 13532 ASSERT(state->dts_naggregations > 0);
13364 13533 kmem_free(state->dts_aggregations,
13365 13534 state->dts_naggregations * sizeof (dtrace_aggregation_t *));
13366 13535 }
13367 13536
13368 13537 kmem_free(state->dts_buffer, bufsize);
13369 13538 kmem_free(state->dts_aggbuffer, bufsize);
13370 13539
13371 13540 for (i = 0; i < nspec; i++)
13372 13541 kmem_free(spec[i].dtsp_buffer, bufsize);
13373 13542
13374 13543 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13375 13544
13376 13545 dtrace_format_destroy(state);
13377 13546
13378 13547 vmem_destroy(state->dts_aggid_arena);
13379 13548 ddi_soft_state_free(dtrace_softstate, minor);
13380 13549 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
13381 13550 }
13382 13551
13383 13552 /*
13384 13553 * DTrace Anonymous Enabling Functions
13385 13554 */
13386 13555 static dtrace_state_t *
13387 13556 dtrace_anon_grab(void)
13388 13557 {
13389 13558 dtrace_state_t *state;
13390 13559
13391 13560 ASSERT(MUTEX_HELD(&dtrace_lock));
13392 13561
13393 13562 if ((state = dtrace_anon.dta_state) == NULL) {
13394 13563 ASSERT(dtrace_anon.dta_enabling == NULL);
13395 13564 return (NULL);
13396 13565 }
13397 13566
13398 13567 ASSERT(dtrace_anon.dta_enabling != NULL);
13399 13568 ASSERT(dtrace_retained != NULL);
13400 13569
13401 13570 dtrace_enabling_destroy(dtrace_anon.dta_enabling);
13402 13571 dtrace_anon.dta_enabling = NULL;
13403 13572 dtrace_anon.dta_state = NULL;
13404 13573
13405 13574 return (state);
13406 13575 }
13407 13576
13408 13577 static void
13409 13578 dtrace_anon_property(void)
13410 13579 {
13411 13580 int i, rv;
13412 13581 dtrace_state_t *state;
13413 13582 dof_hdr_t *dof;
13414 13583 char c[32]; /* enough for "dof-data-" + digits */
13415 13584
13416 13585 ASSERT(MUTEX_HELD(&dtrace_lock));
13417 13586 ASSERT(MUTEX_HELD(&cpu_lock));
13418 13587
13419 13588 for (i = 0; ; i++) {
13420 13589 (void) snprintf(c, sizeof (c), "dof-data-%d", i);
13421 13590
13422 13591 dtrace_err_verbose = 1;
13423 13592
13424 13593 if ((dof = dtrace_dof_property(c)) == NULL) {
13425 13594 dtrace_err_verbose = 0;
13426 13595 break;
13427 13596 }
13428 13597
13429 13598 /*
13430 13599 * We want to create anonymous state, so we need to transition
13431 13600 * the kernel debugger to indicate that DTrace is active. If
13432 13601 * this fails (e.g. because the debugger has modified text in
13433 13602 * some way), we won't continue with the processing.
13434 13603 */
13435 13604 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
13436 13605 cmn_err(CE_NOTE, "kernel debugger active; anonymous "
13437 13606 "enabling ignored.");
13438 13607 dtrace_dof_destroy(dof);
13439 13608 break;
13440 13609 }
13441 13610
13442 13611 /*
13443 13612 * If we haven't allocated an anonymous state, we'll do so now.
13444 13613 */
13445 13614 if ((state = dtrace_anon.dta_state) == NULL) {
13446 13615 state = dtrace_state_create(NULL, NULL);
13447 13616 dtrace_anon.dta_state = state;
13448 13617
13449 13618 if (state == NULL) {
13450 13619 /*
13451 13620 * This basically shouldn't happen: the only
13452 13621 * failure mode from dtrace_state_create() is a
13453 13622 * failure of ddi_soft_state_zalloc() that
13454 13623 * itself should never happen. Still, the
13455 13624 * interface allows for a failure mode, and
13456 13625 * we want to fail as gracefully as possible:
13457 13626 * we'll emit an error message and cease
13458 13627 * processing anonymous state in this case.
13459 13628 */
13460 13629 cmn_err(CE_WARN, "failed to create "
13461 13630 "anonymous state");
13462 13631 dtrace_dof_destroy(dof);
13463 13632 break;
13464 13633 }
13465 13634 }
13466 13635
13467 13636 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
13468 13637 &dtrace_anon.dta_enabling, 0, B_TRUE);
13469 13638
13470 13639 if (rv == 0)
13471 13640 rv = dtrace_dof_options(dof, state);
13472 13641
13473 13642 dtrace_err_verbose = 0;
13474 13643 dtrace_dof_destroy(dof);
13475 13644
13476 13645 if (rv != 0) {
13477 13646 /*
13478 13647 * This is malformed DOF; chuck any anonymous state
13479 13648 * that we created.
13480 13649 */
13481 13650 ASSERT(dtrace_anon.dta_enabling == NULL);
13482 13651 dtrace_state_destroy(state);
13483 13652 dtrace_anon.dta_state = NULL;
13484 13653 break;
13485 13654 }
13486 13655
13487 13656 ASSERT(dtrace_anon.dta_enabling != NULL);
13488 13657 }
13489 13658
13490 13659 if (dtrace_anon.dta_enabling != NULL) {
13491 13660 int rval;
13492 13661
13493 13662 /*
13494 13663 * dtrace_enabling_retain() can only fail because we are
13495 13664 * trying to retain more enablings than are allowed -- but
13496 13665 * we only have one anonymous enabling, and we are guaranteed
13497 13666 * to be allowed at least one retained enabling; we assert
13498 13667 * that dtrace_enabling_retain() returns success.
13499 13668 */
13500 13669 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
13501 13670 ASSERT(rval == 0);
13502 13671
13503 13672 dtrace_enabling_dump(dtrace_anon.dta_enabling);
13504 13673 }
13505 13674 }
13506 13675
13507 13676 /*
13508 13677 * DTrace Helper Functions
13509 13678 */
13510 13679 static void
13511 13680 dtrace_helper_trace(dtrace_helper_action_t *helper,
13512 13681 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
13513 13682 {
13514 13683 uint32_t size, next, nnext, i;
13515 13684 dtrace_helptrace_t *ent;
13516 13685 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
13517 13686
13518 13687 if (!dtrace_helptrace_enabled)
13519 13688 return;
13520 13689
13521 13690 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
13522 13691
13523 13692 /*
13524 13693 * What would a tracing framework be without its own tracing
13525 13694 * framework? (Well, a hell of a lot simpler, for starters...)
13526 13695 */
13527 13696 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
13528 13697 sizeof (uint64_t) - sizeof (uint64_t);
13529 13698
13530 13699 /*
13531 13700 * Iterate until we can allocate a slot in the trace buffer.
13532 13701 */
13533 13702 do {
13534 13703 next = dtrace_helptrace_next;
13535 13704
13536 13705 if (next + size < dtrace_helptrace_bufsize) {
13537 13706 nnext = next + size;
13538 13707 } else {
13539 13708 nnext = size;
13540 13709 }
13541 13710 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
13542 13711
13543 13712 /*
13544 13713 * We have our slot; fill it in.
13545 13714 */
13546 13715 if (nnext == size)
13547 13716 next = 0;
13548 13717
13549 13718 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
13550 13719 ent->dtht_helper = helper;
13551 13720 ent->dtht_where = where;
13552 13721 ent->dtht_nlocals = vstate->dtvs_nlocals;
13553 13722
13554 13723 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
13555 13724 mstate->dtms_fltoffs : -1;
13556 13725 ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
13557 13726 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
13558 13727
13559 13728 for (i = 0; i < vstate->dtvs_nlocals; i++) {
13560 13729 dtrace_statvar_t *svar;
13561 13730
13562 13731 if ((svar = vstate->dtvs_locals[i]) == NULL)
13563 13732 continue;
13564 13733
13565 13734 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
13566 13735 ent->dtht_locals[i] =
13567 13736 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
13568 13737 }
13569 13738 }
13570 13739
13571 13740 static uint64_t
13572 13741 dtrace_helper(int which, dtrace_mstate_t *mstate,
13573 13742 dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
13574 13743 {
13575 13744 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
13576 13745 uint64_t sarg0 = mstate->dtms_arg[0];
13577 13746 uint64_t sarg1 = mstate->dtms_arg[1];
13578 13747 uint64_t rval;
13579 13748 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
13580 13749 dtrace_helper_action_t *helper;
13581 13750 dtrace_vstate_t *vstate;
13582 13751 dtrace_difo_t *pred;
13583 13752 int i, trace = dtrace_helptrace_enabled;
13584 13753
13585 13754 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
13586 13755
13587 13756 if (helpers == NULL)
13588 13757 return (0);
13589 13758
13590 13759 if ((helper = helpers->dthps_actions[which]) == NULL)
13591 13760 return (0);
13592 13761
13593 13762 vstate = &helpers->dthps_vstate;
13594 13763 mstate->dtms_arg[0] = arg0;
13595 13764 mstate->dtms_arg[1] = arg1;
13596 13765
13597 13766 /*
13598 13767 * Now iterate over each helper. If its predicate evaluates to 'true',
13599 13768 * we'll call the corresponding actions. Note that the below calls
13600 13769 * to dtrace_dif_emulate() may set faults in machine state. This is
13601 13770 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13602 13771 * the stored DIF offset with its own (which is the desired behavior).
13603 13772 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13604 13773 * from machine state; this is okay, too.
13605 13774 */
13606 13775 for (; helper != NULL; helper = helper->dtha_next) {
13607 13776 if ((pred = helper->dtha_predicate) != NULL) {
13608 13777 if (trace)
13609 13778 dtrace_helper_trace(helper, mstate, vstate, 0);
13610 13779
13611 13780 if (!dtrace_dif_emulate(pred, mstate, vstate, state))
13612 13781 goto next;
13613 13782
13614 13783 if (*flags & CPU_DTRACE_FAULT)
13615 13784 goto err;
13616 13785 }
13617 13786
13618 13787 for (i = 0; i < helper->dtha_nactions; i++) {
13619 13788 if (trace)
13620 13789 dtrace_helper_trace(helper,
13621 13790 mstate, vstate, i + 1);
13622 13791
13623 13792 rval = dtrace_dif_emulate(helper->dtha_actions[i],
13624 13793 mstate, vstate, state);
13625 13794
13626 13795 if (*flags & CPU_DTRACE_FAULT)
13627 13796 goto err;
13628 13797 }
13629 13798
13630 13799 next:
13631 13800 if (trace)
13632 13801 dtrace_helper_trace(helper, mstate, vstate,
13633 13802 DTRACE_HELPTRACE_NEXT);
13634 13803 }
13635 13804
13636 13805 if (trace)
13637 13806 dtrace_helper_trace(helper, mstate, vstate,
13638 13807 DTRACE_HELPTRACE_DONE);
13639 13808
13640 13809 /*
13641 13810 * Restore the arg0 that we saved upon entry.
13642 13811 */
13643 13812 mstate->dtms_arg[0] = sarg0;
13644 13813 mstate->dtms_arg[1] = sarg1;
13645 13814
13646 13815 return (rval);
13647 13816
13648 13817 err:
13649 13818 if (trace)
13650 13819 dtrace_helper_trace(helper, mstate, vstate,
13651 13820 DTRACE_HELPTRACE_ERR);
13652 13821
13653 13822 /*
13654 13823 * Restore the arg0 that we saved upon entry.
13655 13824 */
13656 13825 mstate->dtms_arg[0] = sarg0;
13657 13826 mstate->dtms_arg[1] = sarg1;
13658 13827
13659 13828 return (NULL);
13660 13829 }
13661 13830
13662 13831 static void
13663 13832 dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
13664 13833 dtrace_vstate_t *vstate)
13665 13834 {
13666 13835 int i;
13667 13836
13668 13837 if (helper->dtha_predicate != NULL)
13669 13838 dtrace_difo_release(helper->dtha_predicate, vstate);
13670 13839
13671 13840 for (i = 0; i < helper->dtha_nactions; i++) {
13672 13841 ASSERT(helper->dtha_actions[i] != NULL);
13673 13842 dtrace_difo_release(helper->dtha_actions[i], vstate);
13674 13843 }
13675 13844
13676 13845 kmem_free(helper->dtha_actions,
13677 13846 helper->dtha_nactions * sizeof (dtrace_difo_t *));
13678 13847 kmem_free(helper, sizeof (dtrace_helper_action_t));
13679 13848 }
13680 13849
13681 13850 static int
13682 13851 dtrace_helper_destroygen(int gen)
13683 13852 {
13684 13853 proc_t *p = curproc;
13685 13854 dtrace_helpers_t *help = p->p_dtrace_helpers;
13686 13855 dtrace_vstate_t *vstate;
13687 13856 int i;
13688 13857
13689 13858 ASSERT(MUTEX_HELD(&dtrace_lock));
13690 13859
13691 13860 if (help == NULL || gen > help->dthps_generation)
13692 13861 return (EINVAL);
13693 13862
13694 13863 vstate = &help->dthps_vstate;
13695 13864
13696 13865 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
13697 13866 dtrace_helper_action_t *last = NULL, *h, *next;
13698 13867
13699 13868 for (h = help->dthps_actions[i]; h != NULL; h = next) {
13700 13869 next = h->dtha_next;
13701 13870
13702 13871 if (h->dtha_generation == gen) {
13703 13872 if (last != NULL) {
13704 13873 last->dtha_next = next;
13705 13874 } else {
13706 13875 help->dthps_actions[i] = next;
13707 13876 }
13708 13877
13709 13878 dtrace_helper_action_destroy(h, vstate);
13710 13879 } else {
13711 13880 last = h;
13712 13881 }
13713 13882 }
13714 13883 }
13715 13884
13716 13885 /*
13717 13886 * Interate until we've cleared out all helper providers with the
13718 13887 * given generation number.
13719 13888 */
13720 13889 for (;;) {
13721 13890 dtrace_helper_provider_t *prov;
13722 13891
13723 13892 /*
13724 13893 * Look for a helper provider with the right generation. We
13725 13894 * have to start back at the beginning of the list each time
13726 13895 * because we drop dtrace_lock. It's unlikely that we'll make
13727 13896 * more than two passes.
13728 13897 */
13729 13898 for (i = 0; i < help->dthps_nprovs; i++) {
13730 13899 prov = help->dthps_provs[i];
13731 13900
13732 13901 if (prov->dthp_generation == gen)
13733 13902 break;
13734 13903 }
13735 13904
13736 13905 /*
13737 13906 * If there were no matches, we're done.
13738 13907 */
13739 13908 if (i == help->dthps_nprovs)
13740 13909 break;
13741 13910
13742 13911 /*
13743 13912 * Move the last helper provider into this slot.
13744 13913 */
13745 13914 help->dthps_nprovs--;
13746 13915 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
13747 13916 help->dthps_provs[help->dthps_nprovs] = NULL;
13748 13917
13749 13918 mutex_exit(&dtrace_lock);
13750 13919
13751 13920 /*
13752 13921 * If we have a meta provider, remove this helper provider.
13753 13922 */
13754 13923 mutex_enter(&dtrace_meta_lock);
13755 13924 if (dtrace_meta_pid != NULL) {
13756 13925 ASSERT(dtrace_deferred_pid == NULL);
13757 13926 dtrace_helper_provider_remove(&prov->dthp_prov,
13758 13927 p->p_pid);
13759 13928 }
13760 13929 mutex_exit(&dtrace_meta_lock);
13761 13930
13762 13931 dtrace_helper_provider_destroy(prov);
13763 13932
13764 13933 mutex_enter(&dtrace_lock);
13765 13934 }
13766 13935
13767 13936 return (0);
13768 13937 }
13769 13938
13770 13939 static int
13771 13940 dtrace_helper_validate(dtrace_helper_action_t *helper)
13772 13941 {
13773 13942 int err = 0, i;
13774 13943 dtrace_difo_t *dp;
13775 13944
13776 13945 if ((dp = helper->dtha_predicate) != NULL)
13777 13946 err += dtrace_difo_validate_helper(dp);
13778 13947
13779 13948 for (i = 0; i < helper->dtha_nactions; i++)
13780 13949 err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
13781 13950
13782 13951 return (err == 0);
13783 13952 }
13784 13953
13785 13954 static int
13786 13955 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep)
13787 13956 {
13788 13957 dtrace_helpers_t *help;
13789 13958 dtrace_helper_action_t *helper, *last;
13790 13959 dtrace_actdesc_t *act;
13791 13960 dtrace_vstate_t *vstate;
13792 13961 dtrace_predicate_t *pred;
13793 13962 int count = 0, nactions = 0, i;
13794 13963
13795 13964 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
13796 13965 return (EINVAL);
13797 13966
13798 13967 help = curproc->p_dtrace_helpers;
13799 13968 last = help->dthps_actions[which];
13800 13969 vstate = &help->dthps_vstate;
13801 13970
13802 13971 for (count = 0; last != NULL; last = last->dtha_next) {
13803 13972 count++;
13804 13973 if (last->dtha_next == NULL)
13805 13974 break;
13806 13975 }
13807 13976
13808 13977 /*
13809 13978 * If we already have dtrace_helper_actions_max helper actions for this
13810 13979 * helper action type, we'll refuse to add a new one.
13811 13980 */
13812 13981 if (count >= dtrace_helper_actions_max)
13813 13982 return (ENOSPC);
13814 13983
13815 13984 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
13816 13985 helper->dtha_generation = help->dthps_generation;
13817 13986
13818 13987 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
13819 13988 ASSERT(pred->dtp_difo != NULL);
13820 13989 dtrace_difo_hold(pred->dtp_difo);
13821 13990 helper->dtha_predicate = pred->dtp_difo;
13822 13991 }
13823 13992
13824 13993 for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
13825 13994 if (act->dtad_kind != DTRACEACT_DIFEXPR)
13826 13995 goto err;
13827 13996
13828 13997 if (act->dtad_difo == NULL)
13829 13998 goto err;
13830 13999
13831 14000 nactions++;
13832 14001 }
13833 14002
13834 14003 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
13835 14004 (helper->dtha_nactions = nactions), KM_SLEEP);
13836 14005
13837 14006 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
13838 14007 dtrace_difo_hold(act->dtad_difo);
13839 14008 helper->dtha_actions[i++] = act->dtad_difo;
13840 14009 }
13841 14010
13842 14011 if (!dtrace_helper_validate(helper))
13843 14012 goto err;
13844 14013
13845 14014 if (last == NULL) {
13846 14015 help->dthps_actions[which] = helper;
13847 14016 } else {
13848 14017 last->dtha_next = helper;
13849 14018 }
13850 14019
13851 14020 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
13852 14021 dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
13853 14022 dtrace_helptrace_next = 0;
13854 14023 }
13855 14024
13856 14025 return (0);
13857 14026 err:
13858 14027 dtrace_helper_action_destroy(helper, vstate);
13859 14028 return (EINVAL);
13860 14029 }
13861 14030
13862 14031 static void
13863 14032 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
13864 14033 dof_helper_t *dofhp)
13865 14034 {
13866 14035 ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
13867 14036
13868 14037 mutex_enter(&dtrace_meta_lock);
13869 14038 mutex_enter(&dtrace_lock);
13870 14039
13871 14040 if (!dtrace_attached() || dtrace_meta_pid == NULL) {
13872 14041 /*
13873 14042 * If the dtrace module is loaded but not attached, or if
13874 14043 * there aren't isn't a meta provider registered to deal with
13875 14044 * these provider descriptions, we need to postpone creating
13876 14045 * the actual providers until later.
13877 14046 */
13878 14047
13879 14048 if (help->dthps_next == NULL && help->dthps_prev == NULL &&
13880 14049 dtrace_deferred_pid != help) {
13881 14050 help->dthps_deferred = 1;
13882 14051 help->dthps_pid = p->p_pid;
13883 14052 help->dthps_next = dtrace_deferred_pid;
13884 14053 help->dthps_prev = NULL;
13885 14054 if (dtrace_deferred_pid != NULL)
13886 14055 dtrace_deferred_pid->dthps_prev = help;
13887 14056 dtrace_deferred_pid = help;
13888 14057 }
13889 14058
13890 14059 mutex_exit(&dtrace_lock);
13891 14060
13892 14061 } else if (dofhp != NULL) {
13893 14062 /*
13894 14063 * If the dtrace module is loaded and we have a particular
13895 14064 * helper provider description, pass that off to the
13896 14065 * meta provider.
13897 14066 */
13898 14067
13899 14068 mutex_exit(&dtrace_lock);
13900 14069
13901 14070 dtrace_helper_provide(dofhp, p->p_pid);
13902 14071
13903 14072 } else {
13904 14073 /*
13905 14074 * Otherwise, just pass all the helper provider descriptions
13906 14075 * off to the meta provider.
13907 14076 */
13908 14077
13909 14078 int i;
13910 14079 mutex_exit(&dtrace_lock);
13911 14080
13912 14081 for (i = 0; i < help->dthps_nprovs; i++) {
13913 14082 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
13914 14083 p->p_pid);
13915 14084 }
13916 14085 }
13917 14086
13918 14087 mutex_exit(&dtrace_meta_lock);
13919 14088 }
13920 14089
13921 14090 static int
13922 14091 dtrace_helper_provider_add(dof_helper_t *dofhp, int gen)
13923 14092 {
13924 14093 dtrace_helpers_t *help;
13925 14094 dtrace_helper_provider_t *hprov, **tmp_provs;
13926 14095 uint_t tmp_maxprovs, i;
13927 14096
13928 14097 ASSERT(MUTEX_HELD(&dtrace_lock));
13929 14098
13930 14099 help = curproc->p_dtrace_helpers;
13931 14100 ASSERT(help != NULL);
13932 14101
13933 14102 /*
13934 14103 * If we already have dtrace_helper_providers_max helper providers,
13935 14104 * we're refuse to add a new one.
13936 14105 */
13937 14106 if (help->dthps_nprovs >= dtrace_helper_providers_max)
13938 14107 return (ENOSPC);
13939 14108
13940 14109 /*
13941 14110 * Check to make sure this isn't a duplicate.
13942 14111 */
13943 14112 for (i = 0; i < help->dthps_nprovs; i++) {
13944 14113 if (dofhp->dofhp_dof ==
13945 14114 help->dthps_provs[i]->dthp_prov.dofhp_dof)
13946 14115 return (EALREADY);
13947 14116 }
13948 14117
13949 14118 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
13950 14119 hprov->dthp_prov = *dofhp;
13951 14120 hprov->dthp_ref = 1;
13952 14121 hprov->dthp_generation = gen;
13953 14122
13954 14123 /*
13955 14124 * Allocate a bigger table for helper providers if it's already full.
13956 14125 */
13957 14126 if (help->dthps_maxprovs == help->dthps_nprovs) {
13958 14127 tmp_maxprovs = help->dthps_maxprovs;
13959 14128 tmp_provs = help->dthps_provs;
13960 14129
13961 14130 if (help->dthps_maxprovs == 0)
13962 14131 help->dthps_maxprovs = 2;
13963 14132 else
13964 14133 help->dthps_maxprovs *= 2;
13965 14134 if (help->dthps_maxprovs > dtrace_helper_providers_max)
13966 14135 help->dthps_maxprovs = dtrace_helper_providers_max;
13967 14136
13968 14137 ASSERT(tmp_maxprovs < help->dthps_maxprovs);
13969 14138
13970 14139 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
13971 14140 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
13972 14141
13973 14142 if (tmp_provs != NULL) {
13974 14143 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
13975 14144 sizeof (dtrace_helper_provider_t *));
13976 14145 kmem_free(tmp_provs, tmp_maxprovs *
13977 14146 sizeof (dtrace_helper_provider_t *));
13978 14147 }
13979 14148 }
13980 14149
13981 14150 help->dthps_provs[help->dthps_nprovs] = hprov;
13982 14151 help->dthps_nprovs++;
13983 14152
13984 14153 return (0);
13985 14154 }
13986 14155
13987 14156 static void
13988 14157 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
13989 14158 {
13990 14159 mutex_enter(&dtrace_lock);
13991 14160
13992 14161 if (--hprov->dthp_ref == 0) {
13993 14162 dof_hdr_t *dof;
13994 14163 mutex_exit(&dtrace_lock);
13995 14164 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
13996 14165 dtrace_dof_destroy(dof);
13997 14166 kmem_free(hprov, sizeof (dtrace_helper_provider_t));
13998 14167 } else {
13999 14168 mutex_exit(&dtrace_lock);
14000 14169 }
14001 14170 }
14002 14171
14003 14172 static int
14004 14173 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
14005 14174 {
14006 14175 uintptr_t daddr = (uintptr_t)dof;
14007 14176 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
14008 14177 dof_provider_t *provider;
14009 14178 dof_probe_t *probe;
14010 14179 uint8_t *arg;
14011 14180 char *strtab, *typestr;
14012 14181 dof_stridx_t typeidx;
14013 14182 size_t typesz;
14014 14183 uint_t nprobes, j, k;
14015 14184
14016 14185 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
14017 14186
14018 14187 if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
14019 14188 dtrace_dof_error(dof, "misaligned section offset");
14020 14189 return (-1);
14021 14190 }
14022 14191
14023 14192 /*
14024 14193 * The section needs to be large enough to contain the DOF provider
14025 14194 * structure appropriate for the given version.
14026 14195 */
14027 14196 if (sec->dofs_size <
14028 14197 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
14029 14198 offsetof(dof_provider_t, dofpv_prenoffs) :
14030 14199 sizeof (dof_provider_t))) {
14031 14200 dtrace_dof_error(dof, "provider section too small");
14032 14201 return (-1);
14033 14202 }
14034 14203
14035 14204 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
14036 14205 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
14037 14206 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
14038 14207 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
14039 14208 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
14040 14209
14041 14210 if (str_sec == NULL || prb_sec == NULL ||
14042 14211 arg_sec == NULL || off_sec == NULL)
14043 14212 return (-1);
14044 14213
14045 14214 enoff_sec = NULL;
14046 14215
14047 14216 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
14048 14217 provider->dofpv_prenoffs != DOF_SECT_NONE &&
14049 14218 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
14050 14219 provider->dofpv_prenoffs)) == NULL)
14051 14220 return (-1);
14052 14221
14053 14222 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
14054 14223
14055 14224 if (provider->dofpv_name >= str_sec->dofs_size ||
14056 14225 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
14057 14226 dtrace_dof_error(dof, "invalid provider name");
14058 14227 return (-1);
14059 14228 }
14060 14229
14061 14230 if (prb_sec->dofs_entsize == 0 ||
14062 14231 prb_sec->dofs_entsize > prb_sec->dofs_size) {
14063 14232 dtrace_dof_error(dof, "invalid entry size");
14064 14233 return (-1);
14065 14234 }
14066 14235
14067 14236 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
14068 14237 dtrace_dof_error(dof, "misaligned entry size");
14069 14238 return (-1);
14070 14239 }
14071 14240
14072 14241 if (off_sec->dofs_entsize != sizeof (uint32_t)) {
14073 14242 dtrace_dof_error(dof, "invalid entry size");
14074 14243 return (-1);
14075 14244 }
14076 14245
14077 14246 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
14078 14247 dtrace_dof_error(dof, "misaligned section offset");
14079 14248 return (-1);
14080 14249 }
14081 14250
14082 14251 if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
14083 14252 dtrace_dof_error(dof, "invalid entry size");
14084 14253 return (-1);
14085 14254 }
14086 14255
14087 14256 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
14088 14257
14089 14258 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
14090 14259
14091 14260 /*
14092 14261 * Take a pass through the probes to check for errors.
14093 14262 */
14094 14263 for (j = 0; j < nprobes; j++) {
14095 14264 probe = (dof_probe_t *)(uintptr_t)(daddr +
14096 14265 prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
14097 14266
14098 14267 if (probe->dofpr_func >= str_sec->dofs_size) {
14099 14268 dtrace_dof_error(dof, "invalid function name");
14100 14269 return (-1);
14101 14270 }
14102 14271
14103 14272 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
14104 14273 dtrace_dof_error(dof, "function name too long");
14105 14274 return (-1);
14106 14275 }
14107 14276
14108 14277 if (probe->dofpr_name >= str_sec->dofs_size ||
14109 14278 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
14110 14279 dtrace_dof_error(dof, "invalid probe name");
14111 14280 return (-1);
14112 14281 }
14113 14282
14114 14283 /*
14115 14284 * The offset count must not wrap the index, and the offsets
14116 14285 * must also not overflow the section's data.
14117 14286 */
14118 14287 if (probe->dofpr_offidx + probe->dofpr_noffs <
14119 14288 probe->dofpr_offidx ||
14120 14289 (probe->dofpr_offidx + probe->dofpr_noffs) *
14121 14290 off_sec->dofs_entsize > off_sec->dofs_size) {
14122 14291 dtrace_dof_error(dof, "invalid probe offset");
14123 14292 return (-1);
14124 14293 }
14125 14294
14126 14295 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
14127 14296 /*
14128 14297 * If there's no is-enabled offset section, make sure
14129 14298 * there aren't any is-enabled offsets. Otherwise
14130 14299 * perform the same checks as for probe offsets
14131 14300 * (immediately above).
14132 14301 */
14133 14302 if (enoff_sec == NULL) {
14134 14303 if (probe->dofpr_enoffidx != 0 ||
14135 14304 probe->dofpr_nenoffs != 0) {
14136 14305 dtrace_dof_error(dof, "is-enabled "
14137 14306 "offsets with null section");
14138 14307 return (-1);
14139 14308 }
14140 14309 } else if (probe->dofpr_enoffidx +
14141 14310 probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
14142 14311 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
14143 14312 enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
14144 14313 dtrace_dof_error(dof, "invalid is-enabled "
14145 14314 "offset");
14146 14315 return (-1);
14147 14316 }
14148 14317
14149 14318 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
14150 14319 dtrace_dof_error(dof, "zero probe and "
14151 14320 "is-enabled offsets");
14152 14321 return (-1);
14153 14322 }
14154 14323 } else if (probe->dofpr_noffs == 0) {
14155 14324 dtrace_dof_error(dof, "zero probe offsets");
14156 14325 return (-1);
14157 14326 }
14158 14327
14159 14328 if (probe->dofpr_argidx + probe->dofpr_xargc <
14160 14329 probe->dofpr_argidx ||
14161 14330 (probe->dofpr_argidx + probe->dofpr_xargc) *
14162 14331 arg_sec->dofs_entsize > arg_sec->dofs_size) {
14163 14332 dtrace_dof_error(dof, "invalid args");
14164 14333 return (-1);
14165 14334 }
14166 14335
14167 14336 typeidx = probe->dofpr_nargv;
14168 14337 typestr = strtab + probe->dofpr_nargv;
14169 14338 for (k = 0; k < probe->dofpr_nargc; k++) {
14170 14339 if (typeidx >= str_sec->dofs_size) {
14171 14340 dtrace_dof_error(dof, "bad "
14172 14341 "native argument type");
14173 14342 return (-1);
14174 14343 }
14175 14344
14176 14345 typesz = strlen(typestr) + 1;
14177 14346 if (typesz > DTRACE_ARGTYPELEN) {
14178 14347 dtrace_dof_error(dof, "native "
14179 14348 "argument type too long");
14180 14349 return (-1);
14181 14350 }
14182 14351 typeidx += typesz;
14183 14352 typestr += typesz;
14184 14353 }
14185 14354
14186 14355 typeidx = probe->dofpr_xargv;
14187 14356 typestr = strtab + probe->dofpr_xargv;
14188 14357 for (k = 0; k < probe->dofpr_xargc; k++) {
14189 14358 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
14190 14359 dtrace_dof_error(dof, "bad "
14191 14360 "native argument index");
14192 14361 return (-1);
14193 14362 }
14194 14363
14195 14364 if (typeidx >= str_sec->dofs_size) {
14196 14365 dtrace_dof_error(dof, "bad "
14197 14366 "translated argument type");
14198 14367 return (-1);
14199 14368 }
14200 14369
14201 14370 typesz = strlen(typestr) + 1;
14202 14371 if (typesz > DTRACE_ARGTYPELEN) {
14203 14372 dtrace_dof_error(dof, "translated argument "
14204 14373 "type too long");
14205 14374 return (-1);
14206 14375 }
14207 14376
14208 14377 typeidx += typesz;
14209 14378 typestr += typesz;
14210 14379 }
14211 14380 }
14212 14381
14213 14382 return (0);
14214 14383 }
14215 14384
14216 14385 static int
14217 14386 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp)
14218 14387 {
14219 14388 dtrace_helpers_t *help;
14220 14389 dtrace_vstate_t *vstate;
14221 14390 dtrace_enabling_t *enab = NULL;
14222 14391 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
14223 14392 uintptr_t daddr = (uintptr_t)dof;
14224 14393
14225 14394 ASSERT(MUTEX_HELD(&dtrace_lock));
14226 14395
14227 14396 if ((help = curproc->p_dtrace_helpers) == NULL)
14228 14397 help = dtrace_helpers_create(curproc);
14229 14398
14230 14399 vstate = &help->dthps_vstate;
14231 14400
14232 14401 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
14233 14402 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
14234 14403 dtrace_dof_destroy(dof);
14235 14404 return (rv);
14236 14405 }
14237 14406
14238 14407 /*
14239 14408 * Look for helper providers and validate their descriptions.
14240 14409 */
14241 14410 if (dhp != NULL) {
14242 14411 for (i = 0; i < dof->dofh_secnum; i++) {
14243 14412 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
14244 14413 dof->dofh_secoff + i * dof->dofh_secsize);
14245 14414
14246 14415 if (sec->dofs_type != DOF_SECT_PROVIDER)
14247 14416 continue;
14248 14417
14249 14418 if (dtrace_helper_provider_validate(dof, sec) != 0) {
14250 14419 dtrace_enabling_destroy(enab);
14251 14420 dtrace_dof_destroy(dof);
14252 14421 return (-1);
14253 14422 }
14254 14423
14255 14424 nprovs++;
14256 14425 }
14257 14426 }
14258 14427
14259 14428 /*
14260 14429 * Now we need to walk through the ECB descriptions in the enabling.
14261 14430 */
14262 14431 for (i = 0; i < enab->dten_ndesc; i++) {
14263 14432 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
14264 14433 dtrace_probedesc_t *desc = &ep->dted_probe;
14265 14434
14266 14435 if (strcmp(desc->dtpd_provider, "dtrace") != 0)
14267 14436 continue;
14268 14437
14269 14438 if (strcmp(desc->dtpd_mod, "helper") != 0)
14270 14439 continue;
14271 14440
14272 14441 if (strcmp(desc->dtpd_func, "ustack") != 0)
14273 14442 continue;
14274 14443
14275 14444 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
14276 14445 ep)) != 0) {
14277 14446 /*
14278 14447 * Adding this helper action failed -- we are now going
14279 14448 * to rip out the entire generation and return failure.
14280 14449 */
14281 14450 (void) dtrace_helper_destroygen(help->dthps_generation);
14282 14451 dtrace_enabling_destroy(enab);
14283 14452 dtrace_dof_destroy(dof);
14284 14453 return (-1);
14285 14454 }
14286 14455
14287 14456 nhelpers++;
14288 14457 }
14289 14458
14290 14459 if (nhelpers < enab->dten_ndesc)
14291 14460 dtrace_dof_error(dof, "unmatched helpers");
14292 14461
14293 14462 gen = help->dthps_generation++;
14294 14463 dtrace_enabling_destroy(enab);
14295 14464
14296 14465 if (dhp != NULL && nprovs > 0) {
14297 14466 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
14298 14467 if (dtrace_helper_provider_add(dhp, gen) == 0) {
14299 14468 mutex_exit(&dtrace_lock);
14300 14469 dtrace_helper_provider_register(curproc, help, dhp);
14301 14470 mutex_enter(&dtrace_lock);
14302 14471
14303 14472 destroy = 0;
14304 14473 }
14305 14474 }
14306 14475
14307 14476 if (destroy)
14308 14477 dtrace_dof_destroy(dof);
14309 14478
14310 14479 return (gen);
14311 14480 }
14312 14481
14313 14482 static dtrace_helpers_t *
14314 14483 dtrace_helpers_create(proc_t *p)
14315 14484 {
14316 14485 dtrace_helpers_t *help;
14317 14486
14318 14487 ASSERT(MUTEX_HELD(&dtrace_lock));
14319 14488 ASSERT(p->p_dtrace_helpers == NULL);
14320 14489
14321 14490 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
14322 14491 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
14323 14492 DTRACE_NHELPER_ACTIONS, KM_SLEEP);
14324 14493
14325 14494 p->p_dtrace_helpers = help;
14326 14495 dtrace_helpers++;
14327 14496
14328 14497 return (help);
14329 14498 }
14330 14499
14331 14500 static void
14332 14501 dtrace_helpers_destroy(void)
14333 14502 {
14334 14503 dtrace_helpers_t *help;
14335 14504 dtrace_vstate_t *vstate;
14336 14505 proc_t *p = curproc;
14337 14506 int i;
14338 14507
14339 14508 mutex_enter(&dtrace_lock);
14340 14509
14341 14510 ASSERT(p->p_dtrace_helpers != NULL);
14342 14511 ASSERT(dtrace_helpers > 0);
14343 14512
14344 14513 help = p->p_dtrace_helpers;
14345 14514 vstate = &help->dthps_vstate;
14346 14515
14347 14516 /*
14348 14517 * We're now going to lose the help from this process.
14349 14518 */
14350 14519 p->p_dtrace_helpers = NULL;
14351 14520 dtrace_sync();
14352 14521
14353 14522 /*
14354 14523 * Destory the helper actions.
14355 14524 */
14356 14525 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14357 14526 dtrace_helper_action_t *h, *next;
14358 14527
14359 14528 for (h = help->dthps_actions[i]; h != NULL; h = next) {
14360 14529 next = h->dtha_next;
14361 14530 dtrace_helper_action_destroy(h, vstate);
14362 14531 h = next;
14363 14532 }
14364 14533 }
14365 14534
14366 14535 mutex_exit(&dtrace_lock);
14367 14536
14368 14537 /*
14369 14538 * Destroy the helper providers.
14370 14539 */
14371 14540 if (help->dthps_maxprovs > 0) {
14372 14541 mutex_enter(&dtrace_meta_lock);
14373 14542 if (dtrace_meta_pid != NULL) {
14374 14543 ASSERT(dtrace_deferred_pid == NULL);
14375 14544
14376 14545 for (i = 0; i < help->dthps_nprovs; i++) {
14377 14546 dtrace_helper_provider_remove(
14378 14547 &help->dthps_provs[i]->dthp_prov, p->p_pid);
14379 14548 }
14380 14549 } else {
14381 14550 mutex_enter(&dtrace_lock);
14382 14551 ASSERT(help->dthps_deferred == 0 ||
14383 14552 help->dthps_next != NULL ||
14384 14553 help->dthps_prev != NULL ||
14385 14554 help == dtrace_deferred_pid);
14386 14555
14387 14556 /*
14388 14557 * Remove the helper from the deferred list.
14389 14558 */
14390 14559 if (help->dthps_next != NULL)
14391 14560 help->dthps_next->dthps_prev = help->dthps_prev;
14392 14561 if (help->dthps_prev != NULL)
14393 14562 help->dthps_prev->dthps_next = help->dthps_next;
14394 14563 if (dtrace_deferred_pid == help) {
14395 14564 dtrace_deferred_pid = help->dthps_next;
14396 14565 ASSERT(help->dthps_prev == NULL);
14397 14566 }
14398 14567
14399 14568 mutex_exit(&dtrace_lock);
14400 14569 }
14401 14570
14402 14571 mutex_exit(&dtrace_meta_lock);
14403 14572
14404 14573 for (i = 0; i < help->dthps_nprovs; i++) {
14405 14574 dtrace_helper_provider_destroy(help->dthps_provs[i]);
14406 14575 }
14407 14576
14408 14577 kmem_free(help->dthps_provs, help->dthps_maxprovs *
14409 14578 sizeof (dtrace_helper_provider_t *));
14410 14579 }
14411 14580
14412 14581 mutex_enter(&dtrace_lock);
14413 14582
14414 14583 dtrace_vstate_fini(&help->dthps_vstate);
14415 14584 kmem_free(help->dthps_actions,
14416 14585 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
14417 14586 kmem_free(help, sizeof (dtrace_helpers_t));
14418 14587
14419 14588 --dtrace_helpers;
14420 14589 mutex_exit(&dtrace_lock);
14421 14590 }
14422 14591
14423 14592 static void
14424 14593 dtrace_helpers_duplicate(proc_t *from, proc_t *to)
14425 14594 {
14426 14595 dtrace_helpers_t *help, *newhelp;
14427 14596 dtrace_helper_action_t *helper, *new, *last;
14428 14597 dtrace_difo_t *dp;
14429 14598 dtrace_vstate_t *vstate;
14430 14599 int i, j, sz, hasprovs = 0;
14431 14600
14432 14601 mutex_enter(&dtrace_lock);
14433 14602 ASSERT(from->p_dtrace_helpers != NULL);
14434 14603 ASSERT(dtrace_helpers > 0);
14435 14604
14436 14605 help = from->p_dtrace_helpers;
14437 14606 newhelp = dtrace_helpers_create(to);
14438 14607 ASSERT(to->p_dtrace_helpers != NULL);
14439 14608
14440 14609 newhelp->dthps_generation = help->dthps_generation;
14441 14610 vstate = &newhelp->dthps_vstate;
14442 14611
14443 14612 /*
14444 14613 * Duplicate the helper actions.
14445 14614 */
14446 14615 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14447 14616 if ((helper = help->dthps_actions[i]) == NULL)
14448 14617 continue;
14449 14618
14450 14619 for (last = NULL; helper != NULL; helper = helper->dtha_next) {
14451 14620 new = kmem_zalloc(sizeof (dtrace_helper_action_t),
14452 14621 KM_SLEEP);
14453 14622 new->dtha_generation = helper->dtha_generation;
14454 14623
14455 14624 if ((dp = helper->dtha_predicate) != NULL) {
14456 14625 dp = dtrace_difo_duplicate(dp, vstate);
14457 14626 new->dtha_predicate = dp;
14458 14627 }
14459 14628
14460 14629 new->dtha_nactions = helper->dtha_nactions;
14461 14630 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
14462 14631 new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
14463 14632
14464 14633 for (j = 0; j < new->dtha_nactions; j++) {
14465 14634 dtrace_difo_t *dp = helper->dtha_actions[j];
14466 14635
14467 14636 ASSERT(dp != NULL);
14468 14637 dp = dtrace_difo_duplicate(dp, vstate);
14469 14638 new->dtha_actions[j] = dp;
14470 14639 }
14471 14640
14472 14641 if (last != NULL) {
14473 14642 last->dtha_next = new;
14474 14643 } else {
14475 14644 newhelp->dthps_actions[i] = new;
14476 14645 }
14477 14646
14478 14647 last = new;
14479 14648 }
14480 14649 }
14481 14650
14482 14651 /*
14483 14652 * Duplicate the helper providers and register them with the
14484 14653 * DTrace framework.
14485 14654 */
14486 14655 if (help->dthps_nprovs > 0) {
14487 14656 newhelp->dthps_nprovs = help->dthps_nprovs;
14488 14657 newhelp->dthps_maxprovs = help->dthps_nprovs;
14489 14658 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
14490 14659 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
14491 14660 for (i = 0; i < newhelp->dthps_nprovs; i++) {
14492 14661 newhelp->dthps_provs[i] = help->dthps_provs[i];
14493 14662 newhelp->dthps_provs[i]->dthp_ref++;
14494 14663 }
14495 14664
14496 14665 hasprovs = 1;
14497 14666 }
14498 14667
14499 14668 mutex_exit(&dtrace_lock);
14500 14669
14501 14670 if (hasprovs)
14502 14671 dtrace_helper_provider_register(to, newhelp, NULL);
14503 14672 }
14504 14673
14505 14674 /*
14506 14675 * DTrace Hook Functions
14507 14676 */
14508 14677 static void
14509 14678 dtrace_module_loaded(struct modctl *ctl)
14510 14679 {
14511 14680 dtrace_provider_t *prv;
14512 14681
14513 14682 mutex_enter(&dtrace_provider_lock);
14514 14683 mutex_enter(&mod_lock);
14515 14684
14516 14685 ASSERT(ctl->mod_busy);
14517 14686
14518 14687 /*
14519 14688 * We're going to call each providers per-module provide operation
14520 14689 * specifying only this module.
14521 14690 */
14522 14691 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
14523 14692 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
14524 14693
14525 14694 mutex_exit(&mod_lock);
14526 14695 mutex_exit(&dtrace_provider_lock);
14527 14696
14528 14697 /*
14529 14698 * If we have any retained enablings, we need to match against them.
14530 14699 * Enabling probes requires that cpu_lock be held, and we cannot hold
14531 14700 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14532 14701 * module. (In particular, this happens when loading scheduling
14533 14702 * classes.) So if we have any retained enablings, we need to dispatch
14534 14703 * our task queue to do the match for us.
14535 14704 */
14536 14705 mutex_enter(&dtrace_lock);
14537 14706
14538 14707 if (dtrace_retained == NULL) {
14539 14708 mutex_exit(&dtrace_lock);
14540 14709 return;
14541 14710 }
14542 14711
14543 14712 (void) taskq_dispatch(dtrace_taskq,
14544 14713 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
14545 14714
14546 14715 mutex_exit(&dtrace_lock);
14547 14716
14548 14717 /*
14549 14718 * And now, for a little heuristic sleaze: in general, we want to
14550 14719 * match modules as soon as they load. However, we cannot guarantee
14551 14720 * this, because it would lead us to the lock ordering violation
14552 14721 * outlined above. The common case, of course, is that cpu_lock is
14553 14722 * _not_ held -- so we delay here for a clock tick, hoping that that's
14554 14723 * long enough for the task queue to do its work. If it's not, it's
14555 14724 * not a serious problem -- it just means that the module that we
14556 14725 * just loaded may not be immediately instrumentable.
14557 14726 */
14558 14727 delay(1);
14559 14728 }
14560 14729
14561 14730 static void
14562 14731 dtrace_module_unloaded(struct modctl *ctl)
14563 14732 {
14564 14733 dtrace_probe_t template, *probe, *first, *next;
14565 14734 dtrace_provider_t *prov;
14566 14735
14567 14736 template.dtpr_mod = ctl->mod_modname;
14568 14737
14569 14738 mutex_enter(&dtrace_provider_lock);
14570 14739 mutex_enter(&mod_lock);
14571 14740 mutex_enter(&dtrace_lock);
14572 14741
14573 14742 if (dtrace_bymod == NULL) {
14574 14743 /*
14575 14744 * The DTrace module is loaded (obviously) but not attached;
14576 14745 * we don't have any work to do.
14577 14746 */
14578 14747 mutex_exit(&dtrace_provider_lock);
14579 14748 mutex_exit(&mod_lock);
14580 14749 mutex_exit(&dtrace_lock);
14581 14750 return;
14582 14751 }
14583 14752
14584 14753 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
14585 14754 probe != NULL; probe = probe->dtpr_nextmod) {
14586 14755 if (probe->dtpr_ecb != NULL) {
14587 14756 mutex_exit(&dtrace_provider_lock);
14588 14757 mutex_exit(&mod_lock);
14589 14758 mutex_exit(&dtrace_lock);
14590 14759
14591 14760 /*
14592 14761 * This shouldn't _actually_ be possible -- we're
14593 14762 * unloading a module that has an enabled probe in it.
14594 14763 * (It's normally up to the provider to make sure that
14595 14764 * this can't happen.) However, because dtps_enable()
14596 14765 * doesn't have a failure mode, there can be an
14597 14766 * enable/unload race. Upshot: we don't want to
14598 14767 * assert, but we're not going to disable the
14599 14768 * probe, either.
14600 14769 */
14601 14770 if (dtrace_err_verbose) {
14602 14771 cmn_err(CE_WARN, "unloaded module '%s' had "
14603 14772 "enabled probes", ctl->mod_modname);
14604 14773 }
14605 14774
14606 14775 return;
14607 14776 }
14608 14777 }
14609 14778
14610 14779 probe = first;
14611 14780
14612 14781 for (first = NULL; probe != NULL; probe = next) {
14613 14782 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
14614 14783
14615 14784 dtrace_probes[probe->dtpr_id - 1] = NULL;
14616 14785
14617 14786 next = probe->dtpr_nextmod;
14618 14787 dtrace_hash_remove(dtrace_bymod, probe);
14619 14788 dtrace_hash_remove(dtrace_byfunc, probe);
14620 14789 dtrace_hash_remove(dtrace_byname, probe);
14621 14790
14622 14791 if (first == NULL) {
14623 14792 first = probe;
14624 14793 probe->dtpr_nextmod = NULL;
14625 14794 } else {
14626 14795 probe->dtpr_nextmod = first;
14627 14796 first = probe;
14628 14797 }
14629 14798 }
14630 14799
14631 14800 /*
14632 14801 * We've removed all of the module's probes from the hash chains and
14633 14802 * from the probe array. Now issue a dtrace_sync() to be sure that
14634 14803 * everyone has cleared out from any probe array processing.
14635 14804 */
14636 14805 dtrace_sync();
14637 14806
14638 14807 for (probe = first; probe != NULL; probe = first) {
14639 14808 first = probe->dtpr_nextmod;
14640 14809 prov = probe->dtpr_provider;
14641 14810 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
14642 14811 probe->dtpr_arg);
14643 14812 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
14644 14813 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
14645 14814 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
14646 14815 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
14647 14816 kmem_free(probe, sizeof (dtrace_probe_t));
14648 14817 }
14649 14818
14650 14819 mutex_exit(&dtrace_lock);
14651 14820 mutex_exit(&mod_lock);
14652 14821 mutex_exit(&dtrace_provider_lock);
14653 14822 }
14654 14823
14655 14824 void
14656 14825 dtrace_suspend(void)
14657 14826 {
14658 14827 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
14659 14828 }
14660 14829
14661 14830 void
14662 14831 dtrace_resume(void)
14663 14832 {
14664 14833 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
14665 14834 }
14666 14835
14667 14836 static int
14668 14837 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
14669 14838 {
14670 14839 ASSERT(MUTEX_HELD(&cpu_lock));
14671 14840 mutex_enter(&dtrace_lock);
14672 14841
14673 14842 switch (what) {
14674 14843 case CPU_CONFIG: {
14675 14844 dtrace_state_t *state;
14676 14845 dtrace_optval_t *opt, rs, c;
14677 14846
14678 14847 /*
14679 14848 * For now, we only allocate a new buffer for anonymous state.
14680 14849 */
14681 14850 if ((state = dtrace_anon.dta_state) == NULL)
14682 14851 break;
14683 14852
14684 14853 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
14685 14854 break;
14686 14855
14687 14856 opt = state->dts_options;
14688 14857 c = opt[DTRACEOPT_CPU];
14689 14858
14690 14859 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
14691 14860 break;
14692 14861
14693 14862 /*
14694 14863 * Regardless of what the actual policy is, we're going to
14695 14864 * temporarily set our resize policy to be manual. We're
14696 14865 * also going to temporarily set our CPU option to denote
14697 14866 * the newly configured CPU.
14698 14867 */
14699 14868 rs = opt[DTRACEOPT_BUFRESIZE];
14700 14869 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
14701 14870 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
14702 14871
14703 14872 (void) dtrace_state_buffers(state);
14704 14873
14705 14874 opt[DTRACEOPT_BUFRESIZE] = rs;
14706 14875 opt[DTRACEOPT_CPU] = c;
14707 14876
14708 14877 break;
14709 14878 }
14710 14879
14711 14880 case CPU_UNCONFIG:
14712 14881 /*
14713 14882 * We don't free the buffer in the CPU_UNCONFIG case. (The
14714 14883 * buffer will be freed when the consumer exits.)
14715 14884 */
14716 14885 break;
14717 14886
14718 14887 default:
14719 14888 break;
14720 14889 }
14721 14890
14722 14891 mutex_exit(&dtrace_lock);
14723 14892 return (0);
14724 14893 }
14725 14894
14726 14895 static void
14727 14896 dtrace_cpu_setup_initial(processorid_t cpu)
14728 14897 {
14729 14898 (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
14730 14899 }
14731 14900
14732 14901 static void
14733 14902 dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
14734 14903 {
14735 14904 if (dtrace_toxranges >= dtrace_toxranges_max) {
14736 14905 int osize, nsize;
14737 14906 dtrace_toxrange_t *range;
14738 14907
14739 14908 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14740 14909
14741 14910 if (osize == 0) {
14742 14911 ASSERT(dtrace_toxrange == NULL);
14743 14912 ASSERT(dtrace_toxranges_max == 0);
14744 14913 dtrace_toxranges_max = 1;
14745 14914 } else {
14746 14915 dtrace_toxranges_max <<= 1;
14747 14916 }
14748 14917
14749 14918 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14750 14919 range = kmem_zalloc(nsize, KM_SLEEP);
14751 14920
14752 14921 if (dtrace_toxrange != NULL) {
14753 14922 ASSERT(osize != 0);
14754 14923 bcopy(dtrace_toxrange, range, osize);
14755 14924 kmem_free(dtrace_toxrange, osize);
14756 14925 }
14757 14926
14758 14927 dtrace_toxrange = range;
↓ open down ↓ |
1540 lines elided |
↑ open up ↑ |
14759 14928 }
14760 14929
14761 14930 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14762 14931 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14763 14932
14764 14933 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14765 14934 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14766 14935 dtrace_toxranges++;
14767 14936 }
14768 14937
14938 +static void
14939 +dtrace_getf_barrier()
14940 +{
14941 + /*
14942 + * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
14943 + * that contain calls to getf(), this routine will be called on every
14944 + * closef() before either the underlying vnode is released or the
14945 + * file_t itself is freed. By the time we are here, it is essential
14946 + * that the file_t can no longer be accessed from a call to getf()
14947 + * in probe context -- that assures that a dtrace_sync() can be used
14948 + * to clear out any enablings referring to the old structures.
14949 + */
14950 + if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
14951 + kcred->cr_zone->zone_dtrace_getf != 0)
14952 + dtrace_sync();
14953 +}
14954 +
14769 14955 /*
14770 14956 * DTrace Driver Cookbook Functions
14771 14957 */
14772 14958 /*ARGSUSED*/
14773 14959 static int
14774 14960 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14775 14961 {
14776 14962 dtrace_provider_id_t id;
14777 14963 dtrace_state_t *state = NULL;
14778 14964 dtrace_enabling_t *enab;
14779 14965
14780 14966 mutex_enter(&cpu_lock);
14781 14967 mutex_enter(&dtrace_provider_lock);
14782 14968 mutex_enter(&dtrace_lock);
14783 14969
14784 14970 if (ddi_soft_state_init(&dtrace_softstate,
14785 14971 sizeof (dtrace_state_t), 0) != 0) {
14786 14972 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14787 14973 mutex_exit(&cpu_lock);
14788 14974 mutex_exit(&dtrace_provider_lock);
14789 14975 mutex_exit(&dtrace_lock);
14790 14976 return (DDI_FAILURE);
14791 14977 }
14792 14978
14793 14979 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
14794 14980 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
14795 14981 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
14796 14982 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
14797 14983 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
14798 14984 ddi_remove_minor_node(devi, NULL);
14799 14985 ddi_soft_state_fini(&dtrace_softstate);
14800 14986 mutex_exit(&cpu_lock);
14801 14987 mutex_exit(&dtrace_provider_lock);
14802 14988 mutex_exit(&dtrace_lock);
14803 14989 return (DDI_FAILURE);
14804 14990 }
14805 14991
14806 14992 ddi_report_dev(devi);
14807 14993 dtrace_devi = devi;
14808 14994
14809 14995 dtrace_modload = dtrace_module_loaded;
14810 14996 dtrace_modunload = dtrace_module_unloaded;
14811 14997 dtrace_cpu_init = dtrace_cpu_setup_initial;
14812 14998 dtrace_helpers_cleanup = dtrace_helpers_destroy;
14813 14999 dtrace_helpers_fork = dtrace_helpers_duplicate;
14814 15000 dtrace_cpustart_init = dtrace_suspend;
14815 15001 dtrace_cpustart_fini = dtrace_resume;
14816 15002 dtrace_debugger_init = dtrace_suspend;
14817 15003 dtrace_debugger_fini = dtrace_resume;
14818 15004
14819 15005 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
14820 15006
14821 15007 ASSERT(MUTEX_HELD(&cpu_lock));
14822 15008
14823 15009 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
14824 15010 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
14825 15011 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
14826 15012 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
14827 15013 VM_SLEEP | VMC_IDENTIFIER);
14828 15014 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
14829 15015 1, INT_MAX, 0);
14830 15016
14831 15017 dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
14832 15018 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
14833 15019 NULL, NULL, NULL, NULL, NULL, 0);
14834 15020
14835 15021 ASSERT(MUTEX_HELD(&cpu_lock));
14836 15022 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
14837 15023 offsetof(dtrace_probe_t, dtpr_nextmod),
14838 15024 offsetof(dtrace_probe_t, dtpr_prevmod));
14839 15025
14840 15026 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
14841 15027 offsetof(dtrace_probe_t, dtpr_nextfunc),
14842 15028 offsetof(dtrace_probe_t, dtpr_prevfunc));
14843 15029
14844 15030 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
14845 15031 offsetof(dtrace_probe_t, dtpr_nextname),
14846 15032 offsetof(dtrace_probe_t, dtpr_prevname));
14847 15033
14848 15034 if (dtrace_retain_max < 1) {
14849 15035 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
14850 15036 "setting to 1", dtrace_retain_max);
14851 15037 dtrace_retain_max = 1;
14852 15038 }
14853 15039
14854 15040 /*
14855 15041 * Now discover our toxic ranges.
14856 15042 */
14857 15043 dtrace_toxic_ranges(dtrace_toxrange_add);
14858 15044
14859 15045 /*
14860 15046 * Before we register ourselves as a provider to our own framework,
14861 15047 * we would like to assert that dtrace_provider is NULL -- but that's
14862 15048 * not true if we were loaded as a dependency of a DTrace provider.
14863 15049 * Once we've registered, we can assert that dtrace_provider is our
14864 15050 * pseudo provider.
14865 15051 */
14866 15052 (void) dtrace_register("dtrace", &dtrace_provider_attr,
14867 15053 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
14868 15054
14869 15055 ASSERT(dtrace_provider != NULL);
14870 15056 ASSERT((dtrace_provider_id_t)dtrace_provider == id);
14871 15057
14872 15058 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
14873 15059 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
14874 15060 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
14875 15061 dtrace_provider, NULL, NULL, "END", 0, NULL);
14876 15062 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
14877 15063 dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
14878 15064
14879 15065 dtrace_anon_property();
14880 15066 mutex_exit(&cpu_lock);
14881 15067
14882 15068 /*
14883 15069 * If DTrace helper tracing is enabled, we need to allocate the
14884 15070 * trace buffer and initialize the values.
14885 15071 */
14886 15072 if (dtrace_helptrace_enabled) {
14887 15073 ASSERT(dtrace_helptrace_buffer == NULL);
14888 15074 dtrace_helptrace_buffer =
14889 15075 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
14890 15076 dtrace_helptrace_next = 0;
14891 15077 }
14892 15078
14893 15079 /*
14894 15080 * If there are already providers, we must ask them to provide their
14895 15081 * probes, and then match any anonymous enabling against them. Note
14896 15082 * that there should be no other retained enablings at this time:
14897 15083 * the only retained enablings at this time should be the anonymous
14898 15084 * enabling.
14899 15085 */
14900 15086 if (dtrace_anon.dta_enabling != NULL) {
14901 15087 ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
14902 15088
14903 15089 dtrace_enabling_provide(NULL);
14904 15090 state = dtrace_anon.dta_state;
14905 15091
14906 15092 /*
14907 15093 * We couldn't hold cpu_lock across the above call to
14908 15094 * dtrace_enabling_provide(), but we must hold it to actually
14909 15095 * enable the probes. We have to drop all of our locks, pick
14910 15096 * up cpu_lock, and regain our locks before matching the
14911 15097 * retained anonymous enabling.
14912 15098 */
14913 15099 mutex_exit(&dtrace_lock);
14914 15100 mutex_exit(&dtrace_provider_lock);
14915 15101
14916 15102 mutex_enter(&cpu_lock);
14917 15103 mutex_enter(&dtrace_provider_lock);
14918 15104 mutex_enter(&dtrace_lock);
14919 15105
14920 15106 if ((enab = dtrace_anon.dta_enabling) != NULL)
14921 15107 (void) dtrace_enabling_match(enab, NULL);
14922 15108
14923 15109 mutex_exit(&cpu_lock);
14924 15110 }
14925 15111
14926 15112 mutex_exit(&dtrace_lock);
14927 15113 mutex_exit(&dtrace_provider_lock);
14928 15114
14929 15115 if (state != NULL) {
14930 15116 /*
14931 15117 * If we created any anonymous state, set it going now.
14932 15118 */
14933 15119 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
14934 15120 }
14935 15121
14936 15122 return (DDI_SUCCESS);
14937 15123 }
14938 15124
14939 15125 /*ARGSUSED*/
14940 15126 static int
14941 15127 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
14942 15128 {
14943 15129 dtrace_state_t *state;
14944 15130 uint32_t priv;
14945 15131 uid_t uid;
14946 15132 zoneid_t zoneid;
14947 15133
14948 15134 if (getminor(*devp) == DTRACEMNRN_HELPER)
14949 15135 return (0);
14950 15136
14951 15137 /*
14952 15138 * If this wasn't an open with the "helper" minor, then it must be
14953 15139 * the "dtrace" minor.
14954 15140 */
14955 15141 if (getminor(*devp) != DTRACEMNRN_DTRACE)
14956 15142 return (ENXIO);
14957 15143
14958 15144 /*
14959 15145 * If no DTRACE_PRIV_* bits are set in the credential, then the
14960 15146 * caller lacks sufficient permission to do anything with DTrace.
14961 15147 */
14962 15148 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
14963 15149 if (priv == DTRACE_PRIV_NONE)
14964 15150 return (EACCES);
14965 15151
14966 15152 /*
14967 15153 * Ask all providers to provide all their probes.
14968 15154 */
14969 15155 mutex_enter(&dtrace_provider_lock);
14970 15156 dtrace_probe_provide(NULL, NULL);
14971 15157 mutex_exit(&dtrace_provider_lock);
14972 15158
14973 15159 mutex_enter(&cpu_lock);
14974 15160 mutex_enter(&dtrace_lock);
14975 15161 dtrace_opens++;
14976 15162 dtrace_membar_producer();
14977 15163
14978 15164 /*
14979 15165 * If the kernel debugger is active (that is, if the kernel debugger
14980 15166 * modified text in some way), we won't allow the open.
14981 15167 */
14982 15168 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
14983 15169 dtrace_opens--;
14984 15170 mutex_exit(&cpu_lock);
14985 15171 mutex_exit(&dtrace_lock);
14986 15172 return (EBUSY);
14987 15173 }
14988 15174
14989 15175 state = dtrace_state_create(devp, cred_p);
14990 15176 mutex_exit(&cpu_lock);
14991 15177
14992 15178 if (state == NULL) {
14993 15179 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
14994 15180 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
14995 15181 mutex_exit(&dtrace_lock);
14996 15182 return (EAGAIN);
14997 15183 }
14998 15184
14999 15185 mutex_exit(&dtrace_lock);
15000 15186
15001 15187 return (0);
15002 15188 }
15003 15189
15004 15190 /*ARGSUSED*/
15005 15191 static int
15006 15192 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
15007 15193 {
15008 15194 minor_t minor = getminor(dev);
15009 15195 dtrace_state_t *state;
15010 15196
15011 15197 if (minor == DTRACEMNRN_HELPER)
15012 15198 return (0);
15013 15199
15014 15200 state = ddi_get_soft_state(dtrace_softstate, minor);
15015 15201
15016 15202 mutex_enter(&cpu_lock);
15017 15203 mutex_enter(&dtrace_lock);
15018 15204
15019 15205 if (state->dts_anon) {
15020 15206 /*
15021 15207 * There is anonymous state. Destroy that first.
15022 15208 */
15023 15209 ASSERT(dtrace_anon.dta_state == NULL);
15024 15210 dtrace_state_destroy(state->dts_anon);
15025 15211 }
15026 15212
15027 15213 dtrace_state_destroy(state);
15028 15214 ASSERT(dtrace_opens > 0);
15029 15215
15030 15216 /*
15031 15217 * Only relinquish control of the kernel debugger interface when there
15032 15218 * are no consumers and no anonymous enablings.
15033 15219 */
15034 15220 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
15035 15221 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15036 15222
15037 15223 mutex_exit(&dtrace_lock);
15038 15224 mutex_exit(&cpu_lock);
15039 15225
15040 15226 return (0);
15041 15227 }
15042 15228
15043 15229 /*ARGSUSED*/
15044 15230 static int
15045 15231 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
15046 15232 {
15047 15233 int rval;
15048 15234 dof_helper_t help, *dhp = NULL;
15049 15235
15050 15236 switch (cmd) {
15051 15237 case DTRACEHIOC_ADDDOF:
15052 15238 if (copyin((void *)arg, &help, sizeof (help)) != 0) {
15053 15239 dtrace_dof_error(NULL, "failed to copyin DOF helper");
15054 15240 return (EFAULT);
15055 15241 }
15056 15242
15057 15243 dhp = &help;
15058 15244 arg = (intptr_t)help.dofhp_dof;
15059 15245 /*FALLTHROUGH*/
15060 15246
15061 15247 case DTRACEHIOC_ADD: {
15062 15248 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
15063 15249
15064 15250 if (dof == NULL)
15065 15251 return (rval);
15066 15252
15067 15253 mutex_enter(&dtrace_lock);
15068 15254
15069 15255 /*
15070 15256 * dtrace_helper_slurp() takes responsibility for the dof --
15071 15257 * it may free it now or it may save it and free it later.
15072 15258 */
15073 15259 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
15074 15260 *rv = rval;
15075 15261 rval = 0;
15076 15262 } else {
15077 15263 rval = EINVAL;
15078 15264 }
15079 15265
15080 15266 mutex_exit(&dtrace_lock);
15081 15267 return (rval);
15082 15268 }
15083 15269
15084 15270 case DTRACEHIOC_REMOVE: {
15085 15271 mutex_enter(&dtrace_lock);
15086 15272 rval = dtrace_helper_destroygen(arg);
15087 15273 mutex_exit(&dtrace_lock);
15088 15274
15089 15275 return (rval);
15090 15276 }
15091 15277
15092 15278 default:
15093 15279 break;
15094 15280 }
15095 15281
15096 15282 return (ENOTTY);
15097 15283 }
15098 15284
15099 15285 /*ARGSUSED*/
15100 15286 static int
15101 15287 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
15102 15288 {
15103 15289 minor_t minor = getminor(dev);
15104 15290 dtrace_state_t *state;
15105 15291 int rval;
15106 15292
15107 15293 if (minor == DTRACEMNRN_HELPER)
15108 15294 return (dtrace_ioctl_helper(cmd, arg, rv));
15109 15295
15110 15296 state = ddi_get_soft_state(dtrace_softstate, minor);
15111 15297
15112 15298 if (state->dts_anon) {
15113 15299 ASSERT(dtrace_anon.dta_state == NULL);
15114 15300 state = state->dts_anon;
15115 15301 }
15116 15302
15117 15303 switch (cmd) {
15118 15304 case DTRACEIOC_PROVIDER: {
15119 15305 dtrace_providerdesc_t pvd;
15120 15306 dtrace_provider_t *pvp;
15121 15307
15122 15308 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
15123 15309 return (EFAULT);
15124 15310
15125 15311 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
15126 15312 mutex_enter(&dtrace_provider_lock);
15127 15313
15128 15314 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
15129 15315 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
15130 15316 break;
15131 15317 }
15132 15318
15133 15319 mutex_exit(&dtrace_provider_lock);
15134 15320
15135 15321 if (pvp == NULL)
15136 15322 return (ESRCH);
15137 15323
15138 15324 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
15139 15325 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
15140 15326 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
15141 15327 return (EFAULT);
15142 15328
15143 15329 return (0);
15144 15330 }
15145 15331
15146 15332 case DTRACEIOC_EPROBE: {
15147 15333 dtrace_eprobedesc_t epdesc;
15148 15334 dtrace_ecb_t *ecb;
15149 15335 dtrace_action_t *act;
15150 15336 void *buf;
15151 15337 size_t size;
15152 15338 uintptr_t dest;
15153 15339 int nrecs;
15154 15340
15155 15341 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
15156 15342 return (EFAULT);
15157 15343
15158 15344 mutex_enter(&dtrace_lock);
15159 15345
15160 15346 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
15161 15347 mutex_exit(&dtrace_lock);
15162 15348 return (EINVAL);
15163 15349 }
15164 15350
15165 15351 if (ecb->dte_probe == NULL) {
15166 15352 mutex_exit(&dtrace_lock);
15167 15353 return (EINVAL);
15168 15354 }
15169 15355
15170 15356 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
15171 15357 epdesc.dtepd_uarg = ecb->dte_uarg;
15172 15358 epdesc.dtepd_size = ecb->dte_size;
15173 15359
15174 15360 nrecs = epdesc.dtepd_nrecs;
15175 15361 epdesc.dtepd_nrecs = 0;
15176 15362 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
15177 15363 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
15178 15364 continue;
15179 15365
15180 15366 epdesc.dtepd_nrecs++;
15181 15367 }
15182 15368
15183 15369 /*
15184 15370 * Now that we have the size, we need to allocate a temporary
15185 15371 * buffer in which to store the complete description. We need
15186 15372 * the temporary buffer to be able to drop dtrace_lock()
15187 15373 * across the copyout(), below.
15188 15374 */
15189 15375 size = sizeof (dtrace_eprobedesc_t) +
15190 15376 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
15191 15377
15192 15378 buf = kmem_alloc(size, KM_SLEEP);
15193 15379 dest = (uintptr_t)buf;
15194 15380
15195 15381 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
15196 15382 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
15197 15383
15198 15384 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
15199 15385 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
15200 15386 continue;
15201 15387
15202 15388 if (nrecs-- == 0)
15203 15389 break;
15204 15390
15205 15391 bcopy(&act->dta_rec, (void *)dest,
15206 15392 sizeof (dtrace_recdesc_t));
15207 15393 dest += sizeof (dtrace_recdesc_t);
15208 15394 }
15209 15395
15210 15396 mutex_exit(&dtrace_lock);
15211 15397
15212 15398 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
15213 15399 kmem_free(buf, size);
15214 15400 return (EFAULT);
15215 15401 }
15216 15402
15217 15403 kmem_free(buf, size);
15218 15404 return (0);
15219 15405 }
15220 15406
15221 15407 case DTRACEIOC_AGGDESC: {
15222 15408 dtrace_aggdesc_t aggdesc;
15223 15409 dtrace_action_t *act;
15224 15410 dtrace_aggregation_t *agg;
15225 15411 int nrecs;
15226 15412 uint32_t offs;
15227 15413 dtrace_recdesc_t *lrec;
15228 15414 void *buf;
15229 15415 size_t size;
15230 15416 uintptr_t dest;
15231 15417
15232 15418 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
15233 15419 return (EFAULT);
15234 15420
15235 15421 mutex_enter(&dtrace_lock);
15236 15422
15237 15423 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
15238 15424 mutex_exit(&dtrace_lock);
15239 15425 return (EINVAL);
15240 15426 }
15241 15427
15242 15428 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
15243 15429
15244 15430 nrecs = aggdesc.dtagd_nrecs;
15245 15431 aggdesc.dtagd_nrecs = 0;
15246 15432
15247 15433 offs = agg->dtag_base;
15248 15434 lrec = &agg->dtag_action.dta_rec;
15249 15435 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
15250 15436
15251 15437 for (act = agg->dtag_first; ; act = act->dta_next) {
15252 15438 ASSERT(act->dta_intuple ||
15253 15439 DTRACEACT_ISAGG(act->dta_kind));
15254 15440
15255 15441 /*
15256 15442 * If this action has a record size of zero, it
15257 15443 * denotes an argument to the aggregating action.
15258 15444 * Because the presence of this record doesn't (or
15259 15445 * shouldn't) affect the way the data is interpreted,
15260 15446 * we don't copy it out to save user-level the
15261 15447 * confusion of dealing with a zero-length record.
15262 15448 */
15263 15449 if (act->dta_rec.dtrd_size == 0) {
15264 15450 ASSERT(agg->dtag_hasarg);
15265 15451 continue;
15266 15452 }
15267 15453
15268 15454 aggdesc.dtagd_nrecs++;
15269 15455
15270 15456 if (act == &agg->dtag_action)
15271 15457 break;
15272 15458 }
15273 15459
15274 15460 /*
15275 15461 * Now that we have the size, we need to allocate a temporary
15276 15462 * buffer in which to store the complete description. We need
15277 15463 * the temporary buffer to be able to drop dtrace_lock()
15278 15464 * across the copyout(), below.
15279 15465 */
15280 15466 size = sizeof (dtrace_aggdesc_t) +
15281 15467 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
15282 15468
15283 15469 buf = kmem_alloc(size, KM_SLEEP);
15284 15470 dest = (uintptr_t)buf;
15285 15471
15286 15472 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
15287 15473 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
15288 15474
15289 15475 for (act = agg->dtag_first; ; act = act->dta_next) {
15290 15476 dtrace_recdesc_t rec = act->dta_rec;
15291 15477
15292 15478 /*
15293 15479 * See the comment in the above loop for why we pass
15294 15480 * over zero-length records.
15295 15481 */
15296 15482 if (rec.dtrd_size == 0) {
15297 15483 ASSERT(agg->dtag_hasarg);
15298 15484 continue;
15299 15485 }
15300 15486
15301 15487 if (nrecs-- == 0)
15302 15488 break;
15303 15489
15304 15490 rec.dtrd_offset -= offs;
15305 15491 bcopy(&rec, (void *)dest, sizeof (rec));
15306 15492 dest += sizeof (dtrace_recdesc_t);
15307 15493
15308 15494 if (act == &agg->dtag_action)
15309 15495 break;
15310 15496 }
15311 15497
15312 15498 mutex_exit(&dtrace_lock);
15313 15499
15314 15500 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
15315 15501 kmem_free(buf, size);
15316 15502 return (EFAULT);
15317 15503 }
15318 15504
15319 15505 kmem_free(buf, size);
15320 15506 return (0);
15321 15507 }
15322 15508
15323 15509 case DTRACEIOC_ENABLE: {
15324 15510 dof_hdr_t *dof;
15325 15511 dtrace_enabling_t *enab = NULL;
15326 15512 dtrace_vstate_t *vstate;
15327 15513 int err = 0;
15328 15514
15329 15515 *rv = 0;
15330 15516
15331 15517 /*
15332 15518 * If a NULL argument has been passed, we take this as our
15333 15519 * cue to reevaluate our enablings.
15334 15520 */
15335 15521 if (arg == NULL) {
15336 15522 dtrace_enabling_matchall();
15337 15523
15338 15524 return (0);
15339 15525 }
15340 15526
15341 15527 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
15342 15528 return (rval);
15343 15529
15344 15530 mutex_enter(&cpu_lock);
15345 15531 mutex_enter(&dtrace_lock);
15346 15532 vstate = &state->dts_vstate;
15347 15533
15348 15534 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
15349 15535 mutex_exit(&dtrace_lock);
15350 15536 mutex_exit(&cpu_lock);
15351 15537 dtrace_dof_destroy(dof);
15352 15538 return (EBUSY);
15353 15539 }
15354 15540
15355 15541 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
15356 15542 mutex_exit(&dtrace_lock);
15357 15543 mutex_exit(&cpu_lock);
15358 15544 dtrace_dof_destroy(dof);
15359 15545 return (EINVAL);
15360 15546 }
15361 15547
15362 15548 if ((rval = dtrace_dof_options(dof, state)) != 0) {
15363 15549 dtrace_enabling_destroy(enab);
15364 15550 mutex_exit(&dtrace_lock);
15365 15551 mutex_exit(&cpu_lock);
15366 15552 dtrace_dof_destroy(dof);
15367 15553 return (rval);
15368 15554 }
15369 15555
15370 15556 if ((err = dtrace_enabling_match(enab, rv)) == 0) {
15371 15557 err = dtrace_enabling_retain(enab);
15372 15558 } else {
15373 15559 dtrace_enabling_destroy(enab);
15374 15560 }
15375 15561
15376 15562 mutex_exit(&cpu_lock);
15377 15563 mutex_exit(&dtrace_lock);
15378 15564 dtrace_dof_destroy(dof);
15379 15565
15380 15566 return (err);
15381 15567 }
15382 15568
15383 15569 case DTRACEIOC_REPLICATE: {
15384 15570 dtrace_repldesc_t desc;
15385 15571 dtrace_probedesc_t *match = &desc.dtrpd_match;
15386 15572 dtrace_probedesc_t *create = &desc.dtrpd_create;
15387 15573 int err;
15388 15574
15389 15575 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15390 15576 return (EFAULT);
15391 15577
15392 15578 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15393 15579 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15394 15580 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15395 15581 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15396 15582
15397 15583 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15398 15584 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15399 15585 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15400 15586 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15401 15587
15402 15588 mutex_enter(&dtrace_lock);
15403 15589 err = dtrace_enabling_replicate(state, match, create);
15404 15590 mutex_exit(&dtrace_lock);
15405 15591
15406 15592 return (err);
15407 15593 }
15408 15594
15409 15595 case DTRACEIOC_PROBEMATCH:
15410 15596 case DTRACEIOC_PROBES: {
15411 15597 dtrace_probe_t *probe = NULL;
15412 15598 dtrace_probedesc_t desc;
15413 15599 dtrace_probekey_t pkey;
15414 15600 dtrace_id_t i;
15415 15601 int m = 0;
15416 15602 uint32_t priv;
15417 15603 uid_t uid;
15418 15604 zoneid_t zoneid;
15419 15605
15420 15606 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15421 15607 return (EFAULT);
15422 15608
15423 15609 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
15424 15610 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
15425 15611 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
15426 15612 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
15427 15613
15428 15614 /*
15429 15615 * Before we attempt to match this probe, we want to give
15430 15616 * all providers the opportunity to provide it.
15431 15617 */
15432 15618 if (desc.dtpd_id == DTRACE_IDNONE) {
15433 15619 mutex_enter(&dtrace_provider_lock);
15434 15620 dtrace_probe_provide(&desc, NULL);
15435 15621 mutex_exit(&dtrace_provider_lock);
15436 15622 desc.dtpd_id++;
15437 15623 }
15438 15624
15439 15625 if (cmd == DTRACEIOC_PROBEMATCH) {
15440 15626 dtrace_probekey(&desc, &pkey);
15441 15627 pkey.dtpk_id = DTRACE_IDNONE;
15442 15628 }
15443 15629
15444 15630 dtrace_cred2priv(cr, &priv, &uid, &zoneid);
15445 15631
15446 15632 mutex_enter(&dtrace_lock);
15447 15633
15448 15634 if (cmd == DTRACEIOC_PROBEMATCH) {
15449 15635 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
15450 15636 if ((probe = dtrace_probes[i - 1]) != NULL &&
15451 15637 (m = dtrace_match_probe(probe, &pkey,
15452 15638 priv, uid, zoneid)) != 0)
15453 15639 break;
15454 15640 }
15455 15641
15456 15642 if (m < 0) {
15457 15643 mutex_exit(&dtrace_lock);
15458 15644 return (EINVAL);
15459 15645 }
15460 15646
15461 15647 } else {
15462 15648 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
15463 15649 if ((probe = dtrace_probes[i - 1]) != NULL &&
15464 15650 dtrace_match_priv(probe, priv, uid, zoneid))
15465 15651 break;
15466 15652 }
15467 15653 }
15468 15654
15469 15655 if (probe == NULL) {
15470 15656 mutex_exit(&dtrace_lock);
15471 15657 return (ESRCH);
15472 15658 }
15473 15659
15474 15660 dtrace_probe_description(probe, &desc);
15475 15661 mutex_exit(&dtrace_lock);
15476 15662
15477 15663 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15478 15664 return (EFAULT);
15479 15665
15480 15666 return (0);
15481 15667 }
15482 15668
15483 15669 case DTRACEIOC_PROBEARG: {
15484 15670 dtrace_argdesc_t desc;
15485 15671 dtrace_probe_t *probe;
15486 15672 dtrace_provider_t *prov;
15487 15673
15488 15674 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15489 15675 return (EFAULT);
15490 15676
15491 15677 if (desc.dtargd_id == DTRACE_IDNONE)
15492 15678 return (EINVAL);
15493 15679
15494 15680 if (desc.dtargd_ndx == DTRACE_ARGNONE)
15495 15681 return (EINVAL);
15496 15682
15497 15683 mutex_enter(&dtrace_provider_lock);
15498 15684 mutex_enter(&mod_lock);
15499 15685 mutex_enter(&dtrace_lock);
15500 15686
15501 15687 if (desc.dtargd_id > dtrace_nprobes) {
15502 15688 mutex_exit(&dtrace_lock);
15503 15689 mutex_exit(&mod_lock);
15504 15690 mutex_exit(&dtrace_provider_lock);
15505 15691 return (EINVAL);
15506 15692 }
15507 15693
15508 15694 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
15509 15695 mutex_exit(&dtrace_lock);
15510 15696 mutex_exit(&mod_lock);
15511 15697 mutex_exit(&dtrace_provider_lock);
15512 15698 return (EINVAL);
15513 15699 }
15514 15700
15515 15701 mutex_exit(&dtrace_lock);
15516 15702
15517 15703 prov = probe->dtpr_provider;
15518 15704
15519 15705 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
15520 15706 /*
15521 15707 * There isn't any typed information for this probe.
15522 15708 * Set the argument number to DTRACE_ARGNONE.
15523 15709 */
15524 15710 desc.dtargd_ndx = DTRACE_ARGNONE;
15525 15711 } else {
15526 15712 desc.dtargd_native[0] = '\0';
15527 15713 desc.dtargd_xlate[0] = '\0';
15528 15714 desc.dtargd_mapping = desc.dtargd_ndx;
15529 15715
15530 15716 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
15531 15717 probe->dtpr_id, probe->dtpr_arg, &desc);
15532 15718 }
15533 15719
15534 15720 mutex_exit(&mod_lock);
15535 15721 mutex_exit(&dtrace_provider_lock);
15536 15722
15537 15723 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15538 15724 return (EFAULT);
15539 15725
15540 15726 return (0);
15541 15727 }
15542 15728
15543 15729 case DTRACEIOC_GO: {
15544 15730 processorid_t cpuid;
15545 15731 rval = dtrace_state_go(state, &cpuid);
15546 15732
15547 15733 if (rval != 0)
15548 15734 return (rval);
15549 15735
15550 15736 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
15551 15737 return (EFAULT);
15552 15738
15553 15739 return (0);
15554 15740 }
15555 15741
15556 15742 case DTRACEIOC_STOP: {
15557 15743 processorid_t cpuid;
15558 15744
15559 15745 mutex_enter(&dtrace_lock);
15560 15746 rval = dtrace_state_stop(state, &cpuid);
15561 15747 mutex_exit(&dtrace_lock);
15562 15748
15563 15749 if (rval != 0)
15564 15750 return (rval);
15565 15751
15566 15752 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
15567 15753 return (EFAULT);
15568 15754
15569 15755 return (0);
15570 15756 }
15571 15757
15572 15758 case DTRACEIOC_DOFGET: {
15573 15759 dof_hdr_t hdr, *dof;
15574 15760 uint64_t len;
15575 15761
15576 15762 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
15577 15763 return (EFAULT);
15578 15764
15579 15765 mutex_enter(&dtrace_lock);
15580 15766 dof = dtrace_dof_create(state);
15581 15767 mutex_exit(&dtrace_lock);
15582 15768
15583 15769 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
15584 15770 rval = copyout(dof, (void *)arg, len);
15585 15771 dtrace_dof_destroy(dof);
15586 15772
15587 15773 return (rval == 0 ? 0 : EFAULT);
15588 15774 }
15589 15775
15590 15776 case DTRACEIOC_AGGSNAP:
15591 15777 case DTRACEIOC_BUFSNAP: {
15592 15778 dtrace_bufdesc_t desc;
15593 15779 caddr_t cached;
15594 15780 dtrace_buffer_t *buf;
15595 15781
15596 15782 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
15597 15783 return (EFAULT);
15598 15784
15599 15785 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
15600 15786 return (EINVAL);
15601 15787
15602 15788 mutex_enter(&dtrace_lock);
15603 15789
15604 15790 if (cmd == DTRACEIOC_BUFSNAP) {
15605 15791 buf = &state->dts_buffer[desc.dtbd_cpu];
15606 15792 } else {
15607 15793 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
15608 15794 }
15609 15795
15610 15796 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
15611 15797 size_t sz = buf->dtb_offset;
15612 15798
15613 15799 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
15614 15800 mutex_exit(&dtrace_lock);
15615 15801 return (EBUSY);
15616 15802 }
15617 15803
15618 15804 /*
15619 15805 * If this buffer has already been consumed, we're
15620 15806 * going to indicate that there's nothing left here
15621 15807 * to consume.
15622 15808 */
15623 15809 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
15624 15810 mutex_exit(&dtrace_lock);
15625 15811
15626 15812 desc.dtbd_size = 0;
15627 15813 desc.dtbd_drops = 0;
15628 15814 desc.dtbd_errors = 0;
15629 15815 desc.dtbd_oldest = 0;
15630 15816 sz = sizeof (desc);
15631 15817
15632 15818 if (copyout(&desc, (void *)arg, sz) != 0)
15633 15819 return (EFAULT);
15634 15820
15635 15821 return (0);
15636 15822 }
15637 15823
15638 15824 /*
15639 15825 * If this is a ring buffer that has wrapped, we want
15640 15826 * to copy the whole thing out.
15641 15827 */
15642 15828 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
15643 15829 dtrace_buffer_polish(buf);
15644 15830 sz = buf->dtb_size;
15645 15831 }
15646 15832
15647 15833 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
15648 15834 mutex_exit(&dtrace_lock);
15649 15835 return (EFAULT);
15650 15836 }
15651 15837
15652 15838 desc.dtbd_size = sz;
15653 15839 desc.dtbd_drops = buf->dtb_drops;
15654 15840 desc.dtbd_errors = buf->dtb_errors;
15655 15841 desc.dtbd_oldest = buf->dtb_xamot_offset;
15656 15842 desc.dtbd_timestamp = dtrace_gethrtime();
15657 15843
15658 15844 mutex_exit(&dtrace_lock);
15659 15845
15660 15846 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15661 15847 return (EFAULT);
15662 15848
15663 15849 buf->dtb_flags |= DTRACEBUF_CONSUMED;
15664 15850
15665 15851 return (0);
15666 15852 }
15667 15853
15668 15854 if (buf->dtb_tomax == NULL) {
15669 15855 ASSERT(buf->dtb_xamot == NULL);
15670 15856 mutex_exit(&dtrace_lock);
15671 15857 return (ENOENT);
15672 15858 }
15673 15859
15674 15860 cached = buf->dtb_tomax;
15675 15861 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
15676 15862
15677 15863 dtrace_xcall(desc.dtbd_cpu,
15678 15864 (dtrace_xcall_t)dtrace_buffer_switch, buf);
15679 15865
15680 15866 state->dts_errors += buf->dtb_xamot_errors;
15681 15867
15682 15868 /*
15683 15869 * If the buffers did not actually switch, then the cross call
15684 15870 * did not take place -- presumably because the given CPU is
15685 15871 * not in the ready set. If this is the case, we'll return
15686 15872 * ENOENT.
15687 15873 */
15688 15874 if (buf->dtb_tomax == cached) {
15689 15875 ASSERT(buf->dtb_xamot != cached);
15690 15876 mutex_exit(&dtrace_lock);
15691 15877 return (ENOENT);
15692 15878 }
15693 15879
15694 15880 ASSERT(cached == buf->dtb_xamot);
15695 15881
15696 15882 /*
15697 15883 * We have our snapshot; now copy it out.
15698 15884 */
15699 15885 if (copyout(buf->dtb_xamot, desc.dtbd_data,
15700 15886 buf->dtb_xamot_offset) != 0) {
15701 15887 mutex_exit(&dtrace_lock);
15702 15888 return (EFAULT);
15703 15889 }
15704 15890
15705 15891 desc.dtbd_size = buf->dtb_xamot_offset;
15706 15892 desc.dtbd_drops = buf->dtb_xamot_drops;
15707 15893 desc.dtbd_errors = buf->dtb_xamot_errors;
15708 15894 desc.dtbd_oldest = 0;
15709 15895 desc.dtbd_timestamp = buf->dtb_switched;
15710 15896
15711 15897 mutex_exit(&dtrace_lock);
15712 15898
15713 15899 /*
15714 15900 * Finally, copy out the buffer description.
15715 15901 */
15716 15902 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
15717 15903 return (EFAULT);
15718 15904
15719 15905 return (0);
15720 15906 }
15721 15907
15722 15908 case DTRACEIOC_CONF: {
15723 15909 dtrace_conf_t conf;
15724 15910
15725 15911 bzero(&conf, sizeof (conf));
15726 15912 conf.dtc_difversion = DIF_VERSION;
15727 15913 conf.dtc_difintregs = DIF_DIR_NREGS;
15728 15914 conf.dtc_diftupregs = DIF_DTR_NREGS;
15729 15915 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
15730 15916
15731 15917 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
15732 15918 return (EFAULT);
15733 15919
15734 15920 return (0);
15735 15921 }
15736 15922
15737 15923 case DTRACEIOC_STATUS: {
15738 15924 dtrace_status_t stat;
15739 15925 dtrace_dstate_t *dstate;
15740 15926 int i, j;
15741 15927 uint64_t nerrs;
15742 15928
15743 15929 /*
15744 15930 * See the comment in dtrace_state_deadman() for the reason
15745 15931 * for setting dts_laststatus to INT64_MAX before setting
15746 15932 * it to the correct value.
15747 15933 */
15748 15934 state->dts_laststatus = INT64_MAX;
15749 15935 dtrace_membar_producer();
15750 15936 state->dts_laststatus = dtrace_gethrtime();
15751 15937
15752 15938 bzero(&stat, sizeof (stat));
15753 15939
15754 15940 mutex_enter(&dtrace_lock);
15755 15941
15756 15942 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
15757 15943 mutex_exit(&dtrace_lock);
15758 15944 return (ENOENT);
15759 15945 }
15760 15946
15761 15947 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
15762 15948 stat.dtst_exiting = 1;
15763 15949
15764 15950 nerrs = state->dts_errors;
15765 15951 dstate = &state->dts_vstate.dtvs_dynvars;
15766 15952
15767 15953 for (i = 0; i < NCPU; i++) {
15768 15954 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
15769 15955
15770 15956 stat.dtst_dyndrops += dcpu->dtdsc_drops;
15771 15957 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
15772 15958 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
15773 15959
15774 15960 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
15775 15961 stat.dtst_filled++;
15776 15962
15777 15963 nerrs += state->dts_buffer[i].dtb_errors;
15778 15964
15779 15965 for (j = 0; j < state->dts_nspeculations; j++) {
15780 15966 dtrace_speculation_t *spec;
15781 15967 dtrace_buffer_t *buf;
15782 15968
15783 15969 spec = &state->dts_speculations[j];
15784 15970 buf = &spec->dtsp_buffer[i];
15785 15971 stat.dtst_specdrops += buf->dtb_xamot_drops;
15786 15972 }
15787 15973 }
15788 15974
15789 15975 stat.dtst_specdrops_busy = state->dts_speculations_busy;
15790 15976 stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
15791 15977 stat.dtst_stkstroverflows = state->dts_stkstroverflows;
15792 15978 stat.dtst_dblerrors = state->dts_dblerrors;
15793 15979 stat.dtst_killed =
15794 15980 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
15795 15981 stat.dtst_errors = nerrs;
15796 15982
15797 15983 mutex_exit(&dtrace_lock);
15798 15984
15799 15985 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
15800 15986 return (EFAULT);
15801 15987
15802 15988 return (0);
15803 15989 }
15804 15990
15805 15991 case DTRACEIOC_FORMAT: {
15806 15992 dtrace_fmtdesc_t fmt;
15807 15993 char *str;
15808 15994 int len;
15809 15995
15810 15996 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
15811 15997 return (EFAULT);
15812 15998
15813 15999 mutex_enter(&dtrace_lock);
15814 16000
15815 16001 if (fmt.dtfd_format == 0 ||
15816 16002 fmt.dtfd_format > state->dts_nformats) {
15817 16003 mutex_exit(&dtrace_lock);
15818 16004 return (EINVAL);
15819 16005 }
15820 16006
15821 16007 /*
15822 16008 * Format strings are allocated contiguously and they are
15823 16009 * never freed; if a format index is less than the number
15824 16010 * of formats, we can assert that the format map is non-NULL
15825 16011 * and that the format for the specified index is non-NULL.
15826 16012 */
15827 16013 ASSERT(state->dts_formats != NULL);
15828 16014 str = state->dts_formats[fmt.dtfd_format - 1];
15829 16015 ASSERT(str != NULL);
15830 16016
15831 16017 len = strlen(str) + 1;
15832 16018
15833 16019 if (len > fmt.dtfd_length) {
15834 16020 fmt.dtfd_length = len;
15835 16021
15836 16022 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
15837 16023 mutex_exit(&dtrace_lock);
15838 16024 return (EINVAL);
15839 16025 }
15840 16026 } else {
15841 16027 if (copyout(str, fmt.dtfd_string, len) != 0) {
15842 16028 mutex_exit(&dtrace_lock);
15843 16029 return (EINVAL);
15844 16030 }
15845 16031 }
15846 16032
15847 16033 mutex_exit(&dtrace_lock);
15848 16034 return (0);
15849 16035 }
15850 16036
15851 16037 default:
15852 16038 break;
15853 16039 }
15854 16040
15855 16041 return (ENOTTY);
15856 16042 }
15857 16043
15858 16044 /*ARGSUSED*/
15859 16045 static int
15860 16046 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
15861 16047 {
15862 16048 dtrace_state_t *state;
15863 16049
15864 16050 switch (cmd) {
15865 16051 case DDI_DETACH:
15866 16052 break;
15867 16053
15868 16054 case DDI_SUSPEND:
15869 16055 return (DDI_SUCCESS);
15870 16056
15871 16057 default:
15872 16058 return (DDI_FAILURE);
15873 16059 }
15874 16060
15875 16061 mutex_enter(&cpu_lock);
15876 16062 mutex_enter(&dtrace_provider_lock);
15877 16063 mutex_enter(&dtrace_lock);
15878 16064
15879 16065 ASSERT(dtrace_opens == 0);
15880 16066
15881 16067 if (dtrace_helpers > 0) {
15882 16068 mutex_exit(&dtrace_provider_lock);
15883 16069 mutex_exit(&dtrace_lock);
15884 16070 mutex_exit(&cpu_lock);
15885 16071 return (DDI_FAILURE);
15886 16072 }
15887 16073
15888 16074 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
15889 16075 mutex_exit(&dtrace_provider_lock);
15890 16076 mutex_exit(&dtrace_lock);
15891 16077 mutex_exit(&cpu_lock);
15892 16078 return (DDI_FAILURE);
15893 16079 }
15894 16080
15895 16081 dtrace_provider = NULL;
15896 16082
15897 16083 if ((state = dtrace_anon_grab()) != NULL) {
15898 16084 /*
15899 16085 * If there were ECBs on this state, the provider should
15900 16086 * have not been allowed to detach; assert that there is
15901 16087 * none.
15902 16088 */
15903 16089 ASSERT(state->dts_necbs == 0);
15904 16090 dtrace_state_destroy(state);
15905 16091
15906 16092 /*
15907 16093 * If we're being detached with anonymous state, we need to
15908 16094 * indicate to the kernel debugger that DTrace is now inactive.
15909 16095 */
15910 16096 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15911 16097 }
15912 16098
15913 16099 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
15914 16100 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
↓ open down ↓ |
1136 lines elided |
↑ open up ↑ |
15915 16101 dtrace_cpu_init = NULL;
15916 16102 dtrace_helpers_cleanup = NULL;
15917 16103 dtrace_helpers_fork = NULL;
15918 16104 dtrace_cpustart_init = NULL;
15919 16105 dtrace_cpustart_fini = NULL;
15920 16106 dtrace_debugger_init = NULL;
15921 16107 dtrace_debugger_fini = NULL;
15922 16108 dtrace_modload = NULL;
15923 16109 dtrace_modunload = NULL;
15924 16110
16111 + ASSERT(dtrace_getf == 0);
16112 + ASSERT(dtrace_closef == NULL);
16113 +
15925 16114 mutex_exit(&cpu_lock);
15926 16115
15927 16116 if (dtrace_helptrace_enabled) {
15928 16117 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
15929 16118 dtrace_helptrace_buffer = NULL;
15930 16119 }
15931 16120
15932 16121 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
15933 16122 dtrace_probes = NULL;
15934 16123 dtrace_nprobes = 0;
15935 16124
15936 16125 dtrace_hash_destroy(dtrace_bymod);
15937 16126 dtrace_hash_destroy(dtrace_byfunc);
15938 16127 dtrace_hash_destroy(dtrace_byname);
15939 16128 dtrace_bymod = NULL;
15940 16129 dtrace_byfunc = NULL;
15941 16130 dtrace_byname = NULL;
15942 16131
15943 16132 kmem_cache_destroy(dtrace_state_cache);
15944 16133 vmem_destroy(dtrace_minor);
15945 16134 vmem_destroy(dtrace_arena);
15946 16135
15947 16136 if (dtrace_toxrange != NULL) {
15948 16137 kmem_free(dtrace_toxrange,
15949 16138 dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
15950 16139 dtrace_toxrange = NULL;
15951 16140 dtrace_toxranges = 0;
15952 16141 dtrace_toxranges_max = 0;
15953 16142 }
15954 16143
15955 16144 ddi_remove_minor_node(dtrace_devi, NULL);
15956 16145 dtrace_devi = NULL;
15957 16146
15958 16147 ddi_soft_state_fini(&dtrace_softstate);
15959 16148
15960 16149 ASSERT(dtrace_vtime_references == 0);
15961 16150 ASSERT(dtrace_opens == 0);
15962 16151 ASSERT(dtrace_retained == NULL);
15963 16152
15964 16153 mutex_exit(&dtrace_lock);
15965 16154 mutex_exit(&dtrace_provider_lock);
15966 16155
15967 16156 /*
15968 16157 * We don't destroy the task queue until after we have dropped our
15969 16158 * locks (taskq_destroy() may block on running tasks). To prevent
15970 16159 * attempting to do work after we have effectively detached but before
15971 16160 * the task queue has been destroyed, all tasks dispatched via the
15972 16161 * task queue must check that DTrace is still attached before
15973 16162 * performing any operation.
15974 16163 */
15975 16164 taskq_destroy(dtrace_taskq);
15976 16165 dtrace_taskq = NULL;
15977 16166
15978 16167 return (DDI_SUCCESS);
15979 16168 }
15980 16169
15981 16170 /*ARGSUSED*/
15982 16171 static int
15983 16172 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
15984 16173 {
15985 16174 int error;
15986 16175
15987 16176 switch (infocmd) {
15988 16177 case DDI_INFO_DEVT2DEVINFO:
15989 16178 *result = (void *)dtrace_devi;
15990 16179 error = DDI_SUCCESS;
15991 16180 break;
15992 16181 case DDI_INFO_DEVT2INSTANCE:
15993 16182 *result = (void *)0;
15994 16183 error = DDI_SUCCESS;
15995 16184 break;
15996 16185 default:
15997 16186 error = DDI_FAILURE;
15998 16187 }
15999 16188 return (error);
16000 16189 }
16001 16190
16002 16191 static struct cb_ops dtrace_cb_ops = {
16003 16192 dtrace_open, /* open */
16004 16193 dtrace_close, /* close */
16005 16194 nulldev, /* strategy */
16006 16195 nulldev, /* print */
16007 16196 nodev, /* dump */
16008 16197 nodev, /* read */
16009 16198 nodev, /* write */
16010 16199 dtrace_ioctl, /* ioctl */
16011 16200 nodev, /* devmap */
16012 16201 nodev, /* mmap */
16013 16202 nodev, /* segmap */
16014 16203 nochpoll, /* poll */
16015 16204 ddi_prop_op, /* cb_prop_op */
16016 16205 0, /* streamtab */
16017 16206 D_NEW | D_MP /* Driver compatibility flag */
16018 16207 };
16019 16208
16020 16209 static struct dev_ops dtrace_ops = {
16021 16210 DEVO_REV, /* devo_rev */
16022 16211 0, /* refcnt */
16023 16212 dtrace_info, /* get_dev_info */
16024 16213 nulldev, /* identify */
16025 16214 nulldev, /* probe */
16026 16215 dtrace_attach, /* attach */
16027 16216 dtrace_detach, /* detach */
16028 16217 nodev, /* reset */
16029 16218 &dtrace_cb_ops, /* driver operations */
16030 16219 NULL, /* bus operations */
16031 16220 nodev, /* dev power */
16032 16221 ddi_quiesce_not_needed, /* quiesce */
16033 16222 };
16034 16223
16035 16224 static struct modldrv modldrv = {
16036 16225 &mod_driverops, /* module type (this is a pseudo driver) */
16037 16226 "Dynamic Tracing", /* name of module */
16038 16227 &dtrace_ops, /* driver ops */
16039 16228 };
16040 16229
16041 16230 static struct modlinkage modlinkage = {
16042 16231 MODREV_1,
16043 16232 (void *)&modldrv,
16044 16233 NULL
16045 16234 };
16046 16235
16047 16236 int
16048 16237 _init(void)
16049 16238 {
16050 16239 return (mod_install(&modlinkage));
16051 16240 }
16052 16241
16053 16242 int
16054 16243 _info(struct modinfo *modinfop)
16055 16244 {
16056 16245 return (mod_info(&modlinkage, modinfop));
16057 16246 }
16058 16247
16059 16248 int
16060 16249 _fini(void)
16061 16250 {
16062 16251 return (mod_remove(&modlinkage));
16063 16252 }
↓ open down ↓ |
129 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX