Print this page
8158 Want named threads API
9857 proc manpages should have LIBRARY section
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/dtrace/dtrace.c
+++ new/usr/src/uts/common/dtrace/dtrace.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 - * Copyright (c) 2017, Joyent, Inc.
24 + * Copyright (c) 2018, Joyent, Inc.
25 25 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * DTrace - Dynamic Tracing for Solaris
30 30 *
31 31 * This is the implementation of the Solaris Dynamic Tracing framework
32 32 * (DTrace). The user-visible interface to DTrace is described at length in
33 33 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
34 34 * library, the in-kernel DTrace framework, and the DTrace providers are
35 35 * described in the block comments in the <sys/dtrace.h> header file. The
36 36 * internal architecture of DTrace is described in the block comments in the
37 37 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
38 38 * implementation very much assume mastery of all of these sources; if one has
39 39 * an unanswered question about the implementation, one should consult them
40 40 * first.
41 41 *
42 42 * The functions here are ordered roughly as follows:
43 43 *
44 44 * - Probe context functions
45 45 * - Probe hashing functions
46 46 * - Non-probe context utility functions
47 47 * - Matching functions
48 48 * - Provider-to-Framework API functions
49 49 * - Probe management functions
50 50 * - DIF object functions
51 51 * - Format functions
52 52 * - Predicate functions
53 53 * - ECB functions
54 54 * - Buffer functions
55 55 * - Enabling functions
56 56 * - DOF functions
57 57 * - Anonymous enabling functions
58 58 * - Consumer state functions
59 59 * - Helper functions
60 60 * - Hook functions
61 61 * - Driver cookbook functions
62 62 *
63 63 * Each group of functions begins with a block comment labelled the "DTrace
64 64 * [Group] Functions", allowing one to find each block by searching forward
65 65 * on capital-f functions.
66 66 */
67 67 #include <sys/errno.h>
68 68 #include <sys/stat.h>
69 69 #include <sys/modctl.h>
70 70 #include <sys/conf.h>
71 71 #include <sys/systm.h>
72 72 #include <sys/ddi.h>
73 73 #include <sys/sunddi.h>
74 74 #include <sys/cpuvar.h>
75 75 #include <sys/kmem.h>
76 76 #include <sys/strsubr.h>
77 77 #include <sys/sysmacros.h>
78 78 #include <sys/dtrace_impl.h>
79 79 #include <sys/atomic.h>
80 80 #include <sys/cmn_err.h>
81 81 #include <sys/mutex_impl.h>
82 82 #include <sys/rwlock_impl.h>
83 83 #include <sys/ctf_api.h>
84 84 #include <sys/panic.h>
85 85 #include <sys/priv_impl.h>
86 86 #include <sys/policy.h>
87 87 #include <sys/cred_impl.h>
88 88 #include <sys/procfs_isa.h>
89 89 #include <sys/taskq.h>
90 90 #include <sys/mkdev.h>
91 91 #include <sys/kdi.h>
92 92 #include <sys/zone.h>
93 93 #include <sys/socket.h>
94 94 #include <netinet/in.h>
95 95 #include "strtolctype.h"
96 96
97 97 /*
98 98 * DTrace Tunable Variables
99 99 *
100 100 * The following variables may be tuned by adding a line to /etc/system that
101 101 * includes both the name of the DTrace module ("dtrace") and the name of the
102 102 * variable. For example:
103 103 *
104 104 * set dtrace:dtrace_destructive_disallow = 1
105 105 *
106 106 * In general, the only variables that one should be tuning this way are those
107 107 * that affect system-wide DTrace behavior, and for which the default behavior
108 108 * is undesirable. Most of these variables are tunable on a per-consumer
109 109 * basis using DTrace options, and need not be tuned on a system-wide basis.
110 110 * When tuning these variables, avoid pathological values; while some attempt
111 111 * is made to verify the integrity of these variables, they are not considered
112 112 * part of the supported interface to DTrace, and they are therefore not
113 113 * checked comprehensively. Further, these variables should not be tuned
114 114 * dynamically via "mdb -kw" or other means; they should only be tuned via
115 115 * /etc/system.
116 116 */
117 117 int dtrace_destructive_disallow = 0;
118 118 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
119 119 size_t dtrace_difo_maxsize = (256 * 1024);
120 120 dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024);
121 121 size_t dtrace_statvar_maxsize = (16 * 1024);
122 122 size_t dtrace_actions_max = (16 * 1024);
123 123 size_t dtrace_retain_max = 1024;
124 124 dtrace_optval_t dtrace_helper_actions_max = 1024;
125 125 dtrace_optval_t dtrace_helper_providers_max = 32;
126 126 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
127 127 size_t dtrace_strsize_default = 256;
128 128 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
129 129 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
130 130 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
131 131 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
132 132 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
133 133 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
134 134 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
135 135 dtrace_optval_t dtrace_nspec_default = 1;
136 136 dtrace_optval_t dtrace_specsize_default = 32 * 1024;
137 137 dtrace_optval_t dtrace_stackframes_default = 20;
138 138 dtrace_optval_t dtrace_ustackframes_default = 20;
139 139 dtrace_optval_t dtrace_jstackframes_default = 50;
140 140 dtrace_optval_t dtrace_jstackstrsize_default = 512;
141 141 int dtrace_msgdsize_max = 128;
142 142 hrtime_t dtrace_chill_max = MSEC2NSEC(500); /* 500 ms */
143 143 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
144 144 int dtrace_devdepth_max = 32;
145 145 int dtrace_err_verbose;
146 146 hrtime_t dtrace_deadman_interval = NANOSEC;
147 147 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
148 148 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
149 149 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
150 150
151 151 /*
152 152 * DTrace External Variables
153 153 *
154 154 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
155 155 * available to DTrace consumers via the backtick (`) syntax. One of these,
156 156 * dtrace_zero, is made deliberately so: it is provided as a source of
157 157 * well-known, zero-filled memory. While this variable is not documented,
158 158 * it is used by some translators as an implementation detail.
159 159 */
160 160 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
161 161
162 162 /*
163 163 * DTrace Internal Variables
164 164 */
165 165 static dev_info_t *dtrace_devi; /* device info */
166 166 static vmem_t *dtrace_arena; /* probe ID arena */
167 167 static vmem_t *dtrace_minor; /* minor number arena */
168 168 static taskq_t *dtrace_taskq; /* task queue */
169 169 static dtrace_probe_t **dtrace_probes; /* array of all probes */
170 170 static int dtrace_nprobes; /* number of probes */
171 171 static dtrace_provider_t *dtrace_provider; /* provider list */
172 172 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
173 173 static int dtrace_opens; /* number of opens */
174 174 static int dtrace_helpers; /* number of helpers */
175 175 static int dtrace_getf; /* number of unpriv getf()s */
176 176 static void *dtrace_softstate; /* softstate pointer */
177 177 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
178 178 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
179 179 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
180 180 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
181 181 static int dtrace_toxranges; /* number of toxic ranges */
182 182 static int dtrace_toxranges_max; /* size of toxic range array */
183 183 static dtrace_anon_t dtrace_anon; /* anonymous enabling */
184 184 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
185 185 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
186 186 static kthread_t *dtrace_panicked; /* panicking thread */
187 187 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
188 188 static dtrace_genid_t dtrace_probegen; /* current probe generation */
189 189 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
190 190 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
191 191 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
192 192 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
193 193 static int dtrace_dynvar_failclean; /* dynvars failed to clean */
194 194
195 195 /*
196 196 * DTrace Locking
197 197 * DTrace is protected by three (relatively coarse-grained) locks:
198 198 *
199 199 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
200 200 * including enabling state, probes, ECBs, consumer state, helper state,
201 201 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
202 202 * probe context is lock-free -- synchronization is handled via the
203 203 * dtrace_sync() cross call mechanism.
204 204 *
205 205 * (2) dtrace_provider_lock is required when manipulating provider state, or
206 206 * when provider state must be held constant.
207 207 *
208 208 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
209 209 * when meta provider state must be held constant.
210 210 *
211 211 * The lock ordering between these three locks is dtrace_meta_lock before
212 212 * dtrace_provider_lock before dtrace_lock. (In particular, there are
213 213 * several places where dtrace_provider_lock is held by the framework as it
214 214 * calls into the providers -- which then call back into the framework,
215 215 * grabbing dtrace_lock.)
216 216 *
217 217 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
218 218 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
219 219 * role as a coarse-grained lock; it is acquired before both of these locks.
220 220 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
221 221 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
222 222 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
223 223 * acquired _between_ dtrace_provider_lock and dtrace_lock.
224 224 */
225 225 static kmutex_t dtrace_lock; /* probe state lock */
226 226 static kmutex_t dtrace_provider_lock; /* provider state lock */
227 227 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
228 228
229 229 /*
230 230 * DTrace Provider Variables
231 231 *
232 232 * These are the variables relating to DTrace as a provider (that is, the
233 233 * provider of the BEGIN, END, and ERROR probes).
234 234 */
235 235 static dtrace_pattr_t dtrace_provider_attr = {
236 236 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
237 237 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
238 238 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
239 239 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
240 240 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
241 241 };
242 242
243 243 static void
244 244 dtrace_nullop(void)
245 245 {}
246 246
247 247 static int
248 248 dtrace_enable_nullop(void)
249 249 {
250 250 return (0);
251 251 }
252 252
253 253 static dtrace_pops_t dtrace_provider_ops = {
254 254 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
255 255 (void (*)(void *, struct modctl *))dtrace_nullop,
256 256 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
257 257 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
258 258 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
259 259 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
260 260 NULL,
261 261 NULL,
262 262 NULL,
263 263 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
264 264 };
265 265
266 266 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
267 267 static dtrace_id_t dtrace_probeid_end; /* special END probe */
268 268 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
269 269
270 270 /*
271 271 * DTrace Helper Tracing Variables
272 272 *
273 273 * These variables should be set dynamically to enable helper tracing. The
274 274 * only variables that should be set are dtrace_helptrace_enable (which should
275 275 * be set to a non-zero value to allocate helper tracing buffers on the next
276 276 * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a
277 277 * non-zero value to deallocate helper tracing buffers on the next close of
278 278 * /dev/dtrace). When (and only when) helper tracing is disabled, the
279 279 * buffer size may also be set via dtrace_helptrace_bufsize.
280 280 */
281 281 int dtrace_helptrace_enable = 0;
282 282 int dtrace_helptrace_disable = 0;
283 283 int dtrace_helptrace_bufsize = 16 * 1024 * 1024;
284 284 uint32_t dtrace_helptrace_nlocals;
285 285 static dtrace_helptrace_t *dtrace_helptrace_buffer;
286 286 static uint32_t dtrace_helptrace_next = 0;
287 287 static int dtrace_helptrace_wrapped = 0;
288 288
289 289 /*
290 290 * DTrace Error Hashing
291 291 *
292 292 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
293 293 * table. This is very useful for checking coverage of tests that are
294 294 * expected to induce DIF or DOF processing errors, and may be useful for
295 295 * debugging problems in the DIF code generator or in DOF generation . The
296 296 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
297 297 */
298 298 #ifdef DEBUG
299 299 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
300 300 static const char *dtrace_errlast;
301 301 static kthread_t *dtrace_errthread;
302 302 static kmutex_t dtrace_errlock;
303 303 #endif
304 304
305 305 /*
306 306 * DTrace Macros and Constants
307 307 *
308 308 * These are various macros that are useful in various spots in the
309 309 * implementation, along with a few random constants that have no meaning
310 310 * outside of the implementation. There is no real structure to this cpp
311 311 * mishmash -- but is there ever?
312 312 */
313 313 #define DTRACE_HASHSTR(hash, probe) \
314 314 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
315 315
316 316 #define DTRACE_HASHNEXT(hash, probe) \
317 317 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
318 318
319 319 #define DTRACE_HASHPREV(hash, probe) \
320 320 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
321 321
322 322 #define DTRACE_HASHEQ(hash, lhs, rhs) \
323 323 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
324 324 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
325 325
326 326 #define DTRACE_AGGHASHSIZE_SLEW 17
327 327
328 328 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
329 329
330 330 /*
331 331 * The key for a thread-local variable consists of the lower 61 bits of the
332 332 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
333 333 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
334 334 * equal to a variable identifier. This is necessary (but not sufficient) to
335 335 * assure that global associative arrays never collide with thread-local
336 336 * variables. To guarantee that they cannot collide, we must also define the
337 337 * order for keying dynamic variables. That order is:
338 338 *
339 339 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
340 340 *
341 341 * Because the variable-key and the tls-key are in orthogonal spaces, there is
342 342 * no way for a global variable key signature to match a thread-local key
343 343 * signature.
344 344 */
345 345 #define DTRACE_TLS_THRKEY(where) { \
346 346 uint_t intr = 0; \
347 347 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
348 348 for (; actv; actv >>= 1) \
349 349 intr++; \
350 350 ASSERT(intr < (1 << 3)); \
351 351 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
352 352 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
353 353 }
354 354
355 355 #define DT_BSWAP_8(x) ((x) & 0xff)
356 356 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
357 357 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
358 358 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
359 359
360 360 #define DT_MASK_LO 0x00000000FFFFFFFFULL
361 361
362 362 #define DTRACE_STORE(type, tomax, offset, what) \
363 363 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
364 364
365 365 #ifndef __x86
366 366 #define DTRACE_ALIGNCHECK(addr, size, flags) \
367 367 if (addr & (size - 1)) { \
368 368 *flags |= CPU_DTRACE_BADALIGN; \
369 369 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
370 370 return (0); \
371 371 }
372 372 #else
373 373 #define DTRACE_ALIGNCHECK(addr, size, flags)
374 374 #endif
375 375
376 376 /*
377 377 * Test whether a range of memory starting at testaddr of size testsz falls
378 378 * within the range of memory described by addr, sz. We take care to avoid
379 379 * problems with overflow and underflow of the unsigned quantities, and
380 380 * disallow all negative sizes. Ranges of size 0 are allowed.
381 381 */
382 382 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
383 383 ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
384 384 (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
385 385 (testaddr) + (testsz) >= (testaddr))
386 386
387 387 #define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \
388 388 do { \
389 389 if ((remp) != NULL) { \
390 390 *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \
391 391 } \
392 392 _NOTE(CONSTCOND) } while (0)
393 393
394 394
395 395 /*
396 396 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
397 397 * alloc_sz on the righthand side of the comparison in order to avoid overflow
398 398 * or underflow in the comparison with it. This is simpler than the INRANGE
399 399 * check above, because we know that the dtms_scratch_ptr is valid in the
400 400 * range. Allocations of size zero are allowed.
401 401 */
402 402 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
403 403 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
404 404 (mstate)->dtms_scratch_ptr >= (alloc_sz))
405 405
406 406 #define DTRACE_LOADFUNC(bits) \
407 407 /*CSTYLED*/ \
408 408 uint##bits##_t \
409 409 dtrace_load##bits(uintptr_t addr) \
410 410 { \
411 411 size_t size = bits / NBBY; \
412 412 /*CSTYLED*/ \
413 413 uint##bits##_t rval; \
414 414 int i; \
415 415 volatile uint16_t *flags = (volatile uint16_t *) \
416 416 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
417 417 \
418 418 DTRACE_ALIGNCHECK(addr, size, flags); \
419 419 \
420 420 for (i = 0; i < dtrace_toxranges; i++) { \
421 421 if (addr >= dtrace_toxrange[i].dtt_limit) \
422 422 continue; \
423 423 \
424 424 if (addr + size <= dtrace_toxrange[i].dtt_base) \
425 425 continue; \
426 426 \
427 427 /* \
428 428 * This address falls within a toxic region; return 0. \
429 429 */ \
430 430 *flags |= CPU_DTRACE_BADADDR; \
431 431 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
432 432 return (0); \
433 433 } \
434 434 \
435 435 *flags |= CPU_DTRACE_NOFAULT; \
436 436 /*CSTYLED*/ \
437 437 rval = *((volatile uint##bits##_t *)addr); \
438 438 *flags &= ~CPU_DTRACE_NOFAULT; \
439 439 \
440 440 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
441 441 }
442 442
443 443 #ifdef _LP64
444 444 #define dtrace_loadptr dtrace_load64
445 445 #else
446 446 #define dtrace_loadptr dtrace_load32
447 447 #endif
448 448
449 449 #define DTRACE_DYNHASH_FREE 0
450 450 #define DTRACE_DYNHASH_SINK 1
451 451 #define DTRACE_DYNHASH_VALID 2
452 452
453 453 #define DTRACE_MATCH_FAIL -1
454 454 #define DTRACE_MATCH_NEXT 0
455 455 #define DTRACE_MATCH_DONE 1
456 456 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
457 457 #define DTRACE_STATE_ALIGN 64
458 458
459 459 #define DTRACE_FLAGS2FLT(flags) \
460 460 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
461 461 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
462 462 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
463 463 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
464 464 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
465 465 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
466 466 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
467 467 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
468 468 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
469 469 DTRACEFLT_UNKNOWN)
470 470
471 471 #define DTRACEACT_ISSTRING(act) \
472 472 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
473 473 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
474 474
475 475 static size_t dtrace_strlen(const char *, size_t);
476 476 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
477 477 static void dtrace_enabling_provide(dtrace_provider_t *);
478 478 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
479 479 static void dtrace_enabling_matchall(void);
480 480 static void dtrace_enabling_reap(void);
481 481 static dtrace_state_t *dtrace_anon_grab(void);
482 482 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
483 483 dtrace_state_t *, uint64_t, uint64_t);
484 484 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
485 485 static void dtrace_buffer_drop(dtrace_buffer_t *);
486 486 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
487 487 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
488 488 dtrace_state_t *, dtrace_mstate_t *);
489 489 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
490 490 dtrace_optval_t);
491 491 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
492 492 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
493 493 static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
494 494 static void dtrace_getf_barrier(void);
495 495 static int dtrace_canload_remains(uint64_t, size_t, size_t *,
496 496 dtrace_mstate_t *, dtrace_vstate_t *);
497 497 static int dtrace_canstore_remains(uint64_t, size_t, size_t *,
498 498 dtrace_mstate_t *, dtrace_vstate_t *);
499 499
500 500 /*
501 501 * DTrace Probe Context Functions
502 502 *
503 503 * These functions are called from probe context. Because probe context is
504 504 * any context in which C may be called, arbitrarily locks may be held,
505 505 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
506 506 * As a result, functions called from probe context may only call other DTrace
507 507 * support functions -- they may not interact at all with the system at large.
508 508 * (Note that the ASSERT macro is made probe-context safe by redefining it in
509 509 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
510 510 * loads are to be performed from probe context, they _must_ be in terms of
511 511 * the safe dtrace_load*() variants.
512 512 *
513 513 * Some functions in this block are not actually called from probe context;
514 514 * for these functions, there will be a comment above the function reading
515 515 * "Note: not called from probe context."
516 516 */
517 517 void
518 518 dtrace_panic(const char *format, ...)
519 519 {
520 520 va_list alist;
521 521
522 522 va_start(alist, format);
523 523 dtrace_vpanic(format, alist);
524 524 va_end(alist);
525 525 }
526 526
527 527 int
528 528 dtrace_assfail(const char *a, const char *f, int l)
529 529 {
530 530 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
531 531
532 532 /*
533 533 * We just need something here that even the most clever compiler
534 534 * cannot optimize away.
535 535 */
536 536 return (a[(uintptr_t)f]);
537 537 }
538 538
539 539 /*
540 540 * Atomically increment a specified error counter from probe context.
541 541 */
542 542 static void
543 543 dtrace_error(uint32_t *counter)
544 544 {
545 545 /*
546 546 * Most counters stored to in probe context are per-CPU counters.
547 547 * However, there are some error conditions that are sufficiently
548 548 * arcane that they don't merit per-CPU storage. If these counters
549 549 * are incremented concurrently on different CPUs, scalability will be
550 550 * adversely affected -- but we don't expect them to be white-hot in a
551 551 * correctly constructed enabling...
552 552 */
553 553 uint32_t oval, nval;
554 554
555 555 do {
556 556 oval = *counter;
557 557
558 558 if ((nval = oval + 1) == 0) {
559 559 /*
560 560 * If the counter would wrap, set it to 1 -- assuring
561 561 * that the counter is never zero when we have seen
562 562 * errors. (The counter must be 32-bits because we
563 563 * aren't guaranteed a 64-bit compare&swap operation.)
564 564 * To save this code both the infamy of being fingered
565 565 * by a priggish news story and the indignity of being
566 566 * the target of a neo-puritan witch trial, we're
567 567 * carefully avoiding any colorful description of the
568 568 * likelihood of this condition -- but suffice it to
569 569 * say that it is only slightly more likely than the
570 570 * overflow of predicate cache IDs, as discussed in
571 571 * dtrace_predicate_create().
572 572 */
573 573 nval = 1;
574 574 }
575 575 } while (dtrace_cas32(counter, oval, nval) != oval);
576 576 }
577 577
578 578 /*
579 579 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
580 580 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
581 581 */
582 582 /* BEGIN CSTYLED */
583 583 DTRACE_LOADFUNC(8)
584 584 DTRACE_LOADFUNC(16)
585 585 DTRACE_LOADFUNC(32)
586 586 DTRACE_LOADFUNC(64)
587 587 /* END CSTYLED */
588 588
589 589 static int
590 590 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
591 591 {
592 592 if (dest < mstate->dtms_scratch_base)
593 593 return (0);
594 594
595 595 if (dest + size < dest)
596 596 return (0);
597 597
598 598 if (dest + size > mstate->dtms_scratch_ptr)
599 599 return (0);
600 600
601 601 return (1);
602 602 }
603 603
604 604 static int
605 605 dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
606 606 dtrace_statvar_t **svars, int nsvars)
607 607 {
608 608 int i;
609 609 size_t maxglobalsize, maxlocalsize;
610 610
611 611 if (nsvars == 0)
612 612 return (0);
613 613
614 614 maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
615 615 maxlocalsize = maxglobalsize * NCPU;
616 616
617 617 for (i = 0; i < nsvars; i++) {
618 618 dtrace_statvar_t *svar = svars[i];
619 619 uint8_t scope;
620 620 size_t size;
621 621
622 622 if (svar == NULL || (size = svar->dtsv_size) == 0)
623 623 continue;
624 624
625 625 scope = svar->dtsv_var.dtdv_scope;
626 626
627 627 /*
628 628 * We verify that our size is valid in the spirit of providing
629 629 * defense in depth: we want to prevent attackers from using
630 630 * DTrace to escalate an orthogonal kernel heap corruption bug
631 631 * into the ability to store to arbitrary locations in memory.
632 632 */
633 633 VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
634 634 (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
635 635
636 636 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data,
637 637 svar->dtsv_size)) {
638 638 DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
639 639 svar->dtsv_size);
640 640 return (1);
641 641 }
642 642 }
643 643
644 644 return (0);
645 645 }
646 646
647 647 /*
648 648 * Check to see if the address is within a memory region to which a store may
649 649 * be issued. This includes the DTrace scratch areas, and any DTrace variable
650 650 * region. The caller of dtrace_canstore() is responsible for performing any
651 651 * alignment checks that are needed before stores are actually executed.
652 652 */
653 653 static int
654 654 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
655 655 dtrace_vstate_t *vstate)
656 656 {
657 657 return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
658 658 }
659 659
660 660 /*
661 661 * Implementation of dtrace_canstore which communicates the upper bound of the
662 662 * allowed memory region.
663 663 */
664 664 static int
665 665 dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
666 666 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
667 667 {
668 668 /*
669 669 * First, check to see if the address is in scratch space...
670 670 */
671 671 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
672 672 mstate->dtms_scratch_size)) {
673 673 DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
674 674 mstate->dtms_scratch_size);
675 675 return (1);
676 676 }
677 677
678 678 /*
679 679 * Now check to see if it's a dynamic variable. This check will pick
680 680 * up both thread-local variables and any global dynamically-allocated
681 681 * variables.
682 682 */
683 683 if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
684 684 vstate->dtvs_dynvars.dtds_size)) {
685 685 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
686 686 uintptr_t base = (uintptr_t)dstate->dtds_base +
687 687 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
688 688 uintptr_t chunkoffs;
689 689 dtrace_dynvar_t *dvar;
690 690
691 691 /*
692 692 * Before we assume that we can store here, we need to make
693 693 * sure that it isn't in our metadata -- storing to our
694 694 * dynamic variable metadata would corrupt our state. For
695 695 * the range to not include any dynamic variable metadata,
696 696 * it must:
697 697 *
698 698 * (1) Start above the hash table that is at the base of
699 699 * the dynamic variable space
700 700 *
701 701 * (2) Have a starting chunk offset that is beyond the
702 702 * dtrace_dynvar_t that is at the base of every chunk
703 703 *
704 704 * (3) Not span a chunk boundary
705 705 *
706 706 * (4) Not be in the tuple space of a dynamic variable
707 707 *
708 708 */
709 709 if (addr < base)
710 710 return (0);
711 711
712 712 chunkoffs = (addr - base) % dstate->dtds_chunksize;
713 713
714 714 if (chunkoffs < sizeof (dtrace_dynvar_t))
715 715 return (0);
716 716
717 717 if (chunkoffs + sz > dstate->dtds_chunksize)
718 718 return (0);
719 719
720 720 dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
721 721
722 722 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
723 723 return (0);
724 724
725 725 if (chunkoffs < sizeof (dtrace_dynvar_t) +
726 726 ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
727 727 return (0);
728 728
729 729 DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize);
730 730 return (1);
731 731 }
732 732
733 733 /*
734 734 * Finally, check the static local and global variables. These checks
735 735 * take the longest, so we perform them last.
736 736 */
737 737 if (dtrace_canstore_statvar(addr, sz, remain,
738 738 vstate->dtvs_locals, vstate->dtvs_nlocals))
739 739 return (1);
740 740
741 741 if (dtrace_canstore_statvar(addr, sz, remain,
742 742 vstate->dtvs_globals, vstate->dtvs_nglobals))
743 743 return (1);
744 744
745 745 return (0);
746 746 }
747 747
748 748
749 749 /*
750 750 * Convenience routine to check to see if the address is within a memory
751 751 * region in which a load may be issued given the user's privilege level;
752 752 * if not, it sets the appropriate error flags and loads 'addr' into the
753 753 * illegal value slot.
754 754 *
755 755 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
756 756 * appropriate memory access protection.
757 757 */
758 758 static int
759 759 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
760 760 dtrace_vstate_t *vstate)
761 761 {
762 762 return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
763 763 }
764 764
765 765 /*
766 766 * Implementation of dtrace_canload which communicates the upper bound of the
767 767 * allowed memory region.
768 768 */
769 769 static int
770 770 dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
771 771 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
772 772 {
773 773 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
774 774 file_t *fp;
775 775
776 776 /*
777 777 * If we hold the privilege to read from kernel memory, then
778 778 * everything is readable.
779 779 */
780 780 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
781 781 DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
782 782 return (1);
783 783 }
784 784
785 785 /*
786 786 * You can obviously read that which you can store.
787 787 */
788 788 if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
789 789 return (1);
790 790
791 791 /*
792 792 * We're allowed to read from our own string table.
793 793 */
794 794 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
795 795 mstate->dtms_difo->dtdo_strlen)) {
796 796 DTRACE_RANGE_REMAIN(remain, addr,
797 797 mstate->dtms_difo->dtdo_strtab,
798 798 mstate->dtms_difo->dtdo_strlen);
799 799 return (1);
800 800 }
801 801
802 802 if (vstate->dtvs_state != NULL &&
803 803 dtrace_priv_proc(vstate->dtvs_state, mstate)) {
804 804 proc_t *p;
805 805
806 806 /*
807 807 * When we have privileges to the current process, there are
808 808 * several context-related kernel structures that are safe to
809 809 * read, even absent the privilege to read from kernel memory.
810 810 * These reads are safe because these structures contain only
811 811 * state that (1) we're permitted to read, (2) is harmless or
812 812 * (3) contains pointers to additional kernel state that we're
813 813 * not permitted to read (and as such, do not present an
814 814 * opportunity for privilege escalation). Finally (and
815 815 * critically), because of the nature of their relation with
816 816 * the current thread context, the memory associated with these
817 817 * structures cannot change over the duration of probe context,
818 818 * and it is therefore impossible for this memory to be
819 819 * deallocated and reallocated as something else while it's
820 820 * being operated upon.
821 821 */
822 822 if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) {
823 823 DTRACE_RANGE_REMAIN(remain, addr, curthread,
824 824 sizeof (kthread_t));
825 825 return (1);
826 826 }
827 827
828 828 if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
829 829 sz, curthread->t_procp, sizeof (proc_t))) {
830 830 DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp,
831 831 sizeof (proc_t));
832 832 return (1);
833 833 }
834 834
835 835 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
836 836 curthread->t_cred, sizeof (cred_t))) {
837 837 DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred,
838 838 sizeof (cred_t));
839 839 return (1);
840 840 }
841 841
842 842 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
843 843 &(p->p_pidp->pid_id), sizeof (pid_t))) {
844 844 DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id),
845 845 sizeof (pid_t));
846 846 return (1);
847 847 }
848 848
849 849 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
850 850 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
851 851 DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu,
852 852 offsetof(cpu_t, cpu_pause_thread));
853 853 return (1);
854 854 }
855 855 }
856 856
857 857 if ((fp = mstate->dtms_getf) != NULL) {
858 858 uintptr_t psz = sizeof (void *);
859 859 vnode_t *vp;
860 860 vnodeops_t *op;
861 861
862 862 /*
863 863 * When getf() returns a file_t, the enabling is implicitly
864 864 * granted the (transient) right to read the returned file_t
865 865 * as well as the v_path and v_op->vnop_name of the underlying
866 866 * vnode. These accesses are allowed after a successful
867 867 * getf() because the members that they refer to cannot change
868 868 * once set -- and the barrier logic in the kernel's closef()
869 869 * path assures that the file_t and its referenced vode_t
870 870 * cannot themselves be stale (that is, it impossible for
871 871 * either dtms_getf itself or its f_vnode member to reference
872 872 * freed memory).
873 873 */
874 874 if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) {
875 875 DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t));
876 876 return (1);
877 877 }
878 878
879 879 if ((vp = fp->f_vnode) != NULL) {
880 880 size_t slen;
881 881
882 882 if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) {
883 883 DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path,
884 884 psz);
885 885 return (1);
886 886 }
887 887
888 888 slen = strlen(vp->v_path) + 1;
889 889 if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) {
890 890 DTRACE_RANGE_REMAIN(remain, addr, vp->v_path,
891 891 slen);
892 892 return (1);
893 893 }
894 894
895 895 if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) {
896 896 DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op,
897 897 psz);
898 898 return (1);
899 899 }
900 900
901 901 if ((op = vp->v_op) != NULL &&
902 902 DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
903 903 DTRACE_RANGE_REMAIN(remain, addr,
904 904 &op->vnop_name, psz);
905 905 return (1);
906 906 }
907 907
908 908 if (op != NULL && op->vnop_name != NULL &&
909 909 DTRACE_INRANGE(addr, sz, op->vnop_name,
910 910 (slen = strlen(op->vnop_name) + 1))) {
911 911 DTRACE_RANGE_REMAIN(remain, addr,
912 912 op->vnop_name, slen);
913 913 return (1);
914 914 }
915 915 }
916 916 }
917 917
918 918 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
919 919 *illval = addr;
920 920 return (0);
921 921 }
922 922
923 923 /*
924 924 * Convenience routine to check to see if a given string is within a memory
925 925 * region in which a load may be issued given the user's privilege level;
926 926 * this exists so that we don't need to issue unnecessary dtrace_strlen()
927 927 * calls in the event that the user has all privileges.
928 928 */
929 929 static int
930 930 dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
931 931 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
932 932 {
933 933 size_t rsize;
934 934
935 935 /*
936 936 * If we hold the privilege to read from kernel memory, then
937 937 * everything is readable.
938 938 */
939 939 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
940 940 DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
941 941 return (1);
942 942 }
943 943
944 944 /*
945 945 * Even if the caller is uninterested in querying the remaining valid
946 946 * range, it is required to ensure that the access is allowed.
947 947 */
948 948 if (remain == NULL) {
949 949 remain = &rsize;
950 950 }
951 951 if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
952 952 size_t strsz;
953 953 /*
954 954 * Perform the strlen after determining the length of the
955 955 * memory region which is accessible. This prevents timing
956 956 * information from being used to find NULs in memory which is
957 957 * not accessible to the caller.
958 958 */
959 959 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
960 960 MIN(sz, *remain));
961 961 if (strsz <= *remain) {
962 962 return (1);
963 963 }
964 964 }
965 965
966 966 return (0);
967 967 }
968 968
969 969 /*
970 970 * Convenience routine to check to see if a given variable is within a memory
971 971 * region in which a load may be issued given the user's privilege level.
972 972 */
973 973 static int
974 974 dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
975 975 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
976 976 {
977 977 size_t sz;
978 978 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
979 979
980 980 /*
981 981 * Calculate the max size before performing any checks since even
982 982 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
983 983 * return the max length via 'remain'.
984 984 */
985 985 if (type->dtdt_kind == DIF_TYPE_STRING) {
986 986 dtrace_state_t *state = vstate->dtvs_state;
987 987
988 988 if (state != NULL) {
989 989 sz = state->dts_options[DTRACEOPT_STRSIZE];
990 990 } else {
991 991 /*
992 992 * In helper context, we have a NULL state; fall back
993 993 * to using the system-wide default for the string size
994 994 * in this case.
995 995 */
996 996 sz = dtrace_strsize_default;
997 997 }
998 998 } else {
999 999 sz = type->dtdt_size;
1000 1000 }
1001 1001
1002 1002 /*
1003 1003 * If we hold the privilege to read from kernel memory, then
1004 1004 * everything is readable.
1005 1005 */
1006 1006 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1007 1007 DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
1008 1008 return (1);
1009 1009 }
1010 1010
1011 1011 if (type->dtdt_kind == DIF_TYPE_STRING) {
1012 1012 return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
1013 1013 vstate));
1014 1014 }
1015 1015 return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
1016 1016 vstate));
1017 1017 }
1018 1018
1019 1019 /*
1020 1020 * Convert a string to a signed integer using safe loads.
1021 1021 *
1022 1022 * NOTE: This function uses various macros from strtolctype.h to manipulate
1023 1023 * digit values, etc -- these have all been checked to ensure they make
1024 1024 * no additional function calls.
1025 1025 */
1026 1026 static int64_t
1027 1027 dtrace_strtoll(char *input, int base, size_t limit)
1028 1028 {
1029 1029 uintptr_t pos = (uintptr_t)input;
1030 1030 int64_t val = 0;
1031 1031 int x;
1032 1032 boolean_t neg = B_FALSE;
1033 1033 char c, cc, ccc;
1034 1034 uintptr_t end = pos + limit;
1035 1035
1036 1036 /*
1037 1037 * Consume any whitespace preceding digits.
1038 1038 */
1039 1039 while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
1040 1040 pos++;
1041 1041
1042 1042 /*
1043 1043 * Handle an explicit sign if one is present.
1044 1044 */
1045 1045 if (c == '-' || c == '+') {
1046 1046 if (c == '-')
1047 1047 neg = B_TRUE;
1048 1048 c = dtrace_load8(++pos);
1049 1049 }
1050 1050
1051 1051 /*
1052 1052 * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
1053 1053 * if present.
1054 1054 */
1055 1055 if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
1056 1056 cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
1057 1057 pos += 2;
1058 1058 c = ccc;
1059 1059 }
1060 1060
1061 1061 /*
1062 1062 * Read in contiguous digits until the first non-digit character.
1063 1063 */
1064 1064 for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
1065 1065 c = dtrace_load8(++pos))
1066 1066 val = val * base + x;
1067 1067
1068 1068 return (neg ? -val : val);
1069 1069 }
1070 1070
1071 1071 /*
1072 1072 * Compare two strings using safe loads.
1073 1073 */
1074 1074 static int
1075 1075 dtrace_strncmp(char *s1, char *s2, size_t limit)
1076 1076 {
1077 1077 uint8_t c1, c2;
1078 1078 volatile uint16_t *flags;
1079 1079
1080 1080 if (s1 == s2 || limit == 0)
1081 1081 return (0);
1082 1082
1083 1083 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1084 1084
1085 1085 do {
1086 1086 if (s1 == NULL) {
1087 1087 c1 = '\0';
1088 1088 } else {
1089 1089 c1 = dtrace_load8((uintptr_t)s1++);
1090 1090 }
1091 1091
1092 1092 if (s2 == NULL) {
1093 1093 c2 = '\0';
1094 1094 } else {
1095 1095 c2 = dtrace_load8((uintptr_t)s2++);
1096 1096 }
1097 1097
1098 1098 if (c1 != c2)
1099 1099 return (c1 - c2);
1100 1100 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
1101 1101
1102 1102 return (0);
1103 1103 }
1104 1104
1105 1105 /*
1106 1106 * Compute strlen(s) for a string using safe memory accesses. The additional
1107 1107 * len parameter is used to specify a maximum length to ensure completion.
1108 1108 */
1109 1109 static size_t
1110 1110 dtrace_strlen(const char *s, size_t lim)
1111 1111 {
1112 1112 uint_t len;
1113 1113
1114 1114 for (len = 0; len != lim; len++) {
1115 1115 if (dtrace_load8((uintptr_t)s++) == '\0')
1116 1116 break;
1117 1117 }
1118 1118
1119 1119 return (len);
1120 1120 }
1121 1121
1122 1122 /*
1123 1123 * Check if an address falls within a toxic region.
1124 1124 */
1125 1125 static int
1126 1126 dtrace_istoxic(uintptr_t kaddr, size_t size)
1127 1127 {
1128 1128 uintptr_t taddr, tsize;
1129 1129 int i;
1130 1130
1131 1131 for (i = 0; i < dtrace_toxranges; i++) {
1132 1132 taddr = dtrace_toxrange[i].dtt_base;
1133 1133 tsize = dtrace_toxrange[i].dtt_limit - taddr;
1134 1134
1135 1135 if (kaddr - taddr < tsize) {
1136 1136 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1137 1137 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
1138 1138 return (1);
1139 1139 }
1140 1140
1141 1141 if (taddr - kaddr < size) {
1142 1142 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1143 1143 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
1144 1144 return (1);
1145 1145 }
1146 1146 }
1147 1147
1148 1148 return (0);
1149 1149 }
1150 1150
1151 1151 /*
1152 1152 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
1153 1153 * memory specified by the DIF program. The dst is assumed to be safe memory
1154 1154 * that we can store to directly because it is managed by DTrace. As with
1155 1155 * standard bcopy, overlapping copies are handled properly.
1156 1156 */
1157 1157 static void
1158 1158 dtrace_bcopy(const void *src, void *dst, size_t len)
1159 1159 {
1160 1160 if (len != 0) {
1161 1161 uint8_t *s1 = dst;
1162 1162 const uint8_t *s2 = src;
1163 1163
1164 1164 if (s1 <= s2) {
1165 1165 do {
1166 1166 *s1++ = dtrace_load8((uintptr_t)s2++);
1167 1167 } while (--len != 0);
1168 1168 } else {
1169 1169 s2 += len;
1170 1170 s1 += len;
1171 1171
1172 1172 do {
1173 1173 *--s1 = dtrace_load8((uintptr_t)--s2);
1174 1174 } while (--len != 0);
1175 1175 }
1176 1176 }
1177 1177 }
1178 1178
1179 1179 /*
1180 1180 * Copy src to dst using safe memory accesses, up to either the specified
1181 1181 * length, or the point that a nul byte is encountered. The src is assumed to
1182 1182 * be unsafe memory specified by the DIF program. The dst is assumed to be
1183 1183 * safe memory that we can store to directly because it is managed by DTrace.
1184 1184 * Unlike dtrace_bcopy(), overlapping regions are not handled.
1185 1185 */
1186 1186 static void
1187 1187 dtrace_strcpy(const void *src, void *dst, size_t len)
1188 1188 {
1189 1189 if (len != 0) {
1190 1190 uint8_t *s1 = dst, c;
1191 1191 const uint8_t *s2 = src;
1192 1192
1193 1193 do {
1194 1194 *s1++ = c = dtrace_load8((uintptr_t)s2++);
1195 1195 } while (--len != 0 && c != '\0');
1196 1196 }
1197 1197 }
1198 1198
1199 1199 /*
1200 1200 * Copy src to dst, deriving the size and type from the specified (BYREF)
1201 1201 * variable type. The src is assumed to be unsafe memory specified by the DIF
1202 1202 * program. The dst is assumed to be DTrace variable memory that is of the
1203 1203 * specified type; we assume that we can store to directly.
1204 1204 */
1205 1205 static void
1206 1206 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
1207 1207 {
1208 1208 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1209 1209
1210 1210 if (type->dtdt_kind == DIF_TYPE_STRING) {
1211 1211 dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
1212 1212 } else {
1213 1213 dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
1214 1214 }
1215 1215 }
1216 1216
1217 1217 /*
1218 1218 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1219 1219 * unsafe memory specified by the DIF program. The s2 data is assumed to be
1220 1220 * safe memory that we can access directly because it is managed by DTrace.
1221 1221 */
1222 1222 static int
1223 1223 dtrace_bcmp(const void *s1, const void *s2, size_t len)
1224 1224 {
1225 1225 volatile uint16_t *flags;
1226 1226
1227 1227 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1228 1228
1229 1229 if (s1 == s2)
1230 1230 return (0);
1231 1231
1232 1232 if (s1 == NULL || s2 == NULL)
1233 1233 return (1);
1234 1234
1235 1235 if (s1 != s2 && len != 0) {
1236 1236 const uint8_t *ps1 = s1;
1237 1237 const uint8_t *ps2 = s2;
1238 1238
1239 1239 do {
1240 1240 if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
1241 1241 return (1);
1242 1242 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
1243 1243 }
1244 1244 return (0);
1245 1245 }
1246 1246
1247 1247 /*
1248 1248 * Zero the specified region using a simple byte-by-byte loop. Note that this
1249 1249 * is for safe DTrace-managed memory only.
1250 1250 */
1251 1251 static void
1252 1252 dtrace_bzero(void *dst, size_t len)
1253 1253 {
1254 1254 uchar_t *cp;
1255 1255
1256 1256 for (cp = dst; len != 0; len--)
1257 1257 *cp++ = 0;
1258 1258 }
1259 1259
1260 1260 static void
1261 1261 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
1262 1262 {
1263 1263 uint64_t result[2];
1264 1264
1265 1265 result[0] = addend1[0] + addend2[0];
1266 1266 result[1] = addend1[1] + addend2[1] +
1267 1267 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
1268 1268
1269 1269 sum[0] = result[0];
1270 1270 sum[1] = result[1];
1271 1271 }
1272 1272
1273 1273 /*
1274 1274 * Shift the 128-bit value in a by b. If b is positive, shift left.
1275 1275 * If b is negative, shift right.
1276 1276 */
1277 1277 static void
1278 1278 dtrace_shift_128(uint64_t *a, int b)
1279 1279 {
1280 1280 uint64_t mask;
1281 1281
1282 1282 if (b == 0)
1283 1283 return;
1284 1284
1285 1285 if (b < 0) {
1286 1286 b = -b;
1287 1287 if (b >= 64) {
1288 1288 a[0] = a[1] >> (b - 64);
1289 1289 a[1] = 0;
1290 1290 } else {
1291 1291 a[0] >>= b;
1292 1292 mask = 1LL << (64 - b);
1293 1293 mask -= 1;
1294 1294 a[0] |= ((a[1] & mask) << (64 - b));
1295 1295 a[1] >>= b;
1296 1296 }
1297 1297 } else {
1298 1298 if (b >= 64) {
1299 1299 a[1] = a[0] << (b - 64);
1300 1300 a[0] = 0;
1301 1301 } else {
1302 1302 a[1] <<= b;
1303 1303 mask = a[0] >> (64 - b);
1304 1304 a[1] |= mask;
1305 1305 a[0] <<= b;
1306 1306 }
1307 1307 }
1308 1308 }
1309 1309
1310 1310 /*
1311 1311 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1312 1312 * use native multiplication on those, and then re-combine into the
1313 1313 * resulting 128-bit value.
1314 1314 *
1315 1315 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1316 1316 * hi1 * hi2 << 64 +
1317 1317 * hi1 * lo2 << 32 +
1318 1318 * hi2 * lo1 << 32 +
1319 1319 * lo1 * lo2
1320 1320 */
1321 1321 static void
1322 1322 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1323 1323 {
1324 1324 uint64_t hi1, hi2, lo1, lo2;
1325 1325 uint64_t tmp[2];
1326 1326
1327 1327 hi1 = factor1 >> 32;
1328 1328 hi2 = factor2 >> 32;
1329 1329
1330 1330 lo1 = factor1 & DT_MASK_LO;
1331 1331 lo2 = factor2 & DT_MASK_LO;
1332 1332
1333 1333 product[0] = lo1 * lo2;
1334 1334 product[1] = hi1 * hi2;
1335 1335
1336 1336 tmp[0] = hi1 * lo2;
1337 1337 tmp[1] = 0;
1338 1338 dtrace_shift_128(tmp, 32);
1339 1339 dtrace_add_128(product, tmp, product);
1340 1340
1341 1341 tmp[0] = hi2 * lo1;
1342 1342 tmp[1] = 0;
1343 1343 dtrace_shift_128(tmp, 32);
1344 1344 dtrace_add_128(product, tmp, product);
1345 1345 }
1346 1346
1347 1347 /*
1348 1348 * This privilege check should be used by actions and subroutines to
1349 1349 * verify that the user credentials of the process that enabled the
1350 1350 * invoking ECB match the target credentials
1351 1351 */
1352 1352 static int
1353 1353 dtrace_priv_proc_common_user(dtrace_state_t *state)
1354 1354 {
1355 1355 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1356 1356
1357 1357 /*
1358 1358 * We should always have a non-NULL state cred here, since if cred
1359 1359 * is null (anonymous tracing), we fast-path bypass this routine.
1360 1360 */
1361 1361 ASSERT(s_cr != NULL);
1362 1362
1363 1363 if ((cr = CRED()) != NULL &&
1364 1364 s_cr->cr_uid == cr->cr_uid &&
1365 1365 s_cr->cr_uid == cr->cr_ruid &&
1366 1366 s_cr->cr_uid == cr->cr_suid &&
1367 1367 s_cr->cr_gid == cr->cr_gid &&
1368 1368 s_cr->cr_gid == cr->cr_rgid &&
1369 1369 s_cr->cr_gid == cr->cr_sgid)
1370 1370 return (1);
1371 1371
1372 1372 return (0);
1373 1373 }
1374 1374
1375 1375 /*
1376 1376 * This privilege check should be used by actions and subroutines to
1377 1377 * verify that the zone of the process that enabled the invoking ECB
1378 1378 * matches the target credentials
1379 1379 */
1380 1380 static int
1381 1381 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1382 1382 {
1383 1383 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1384 1384
1385 1385 /*
1386 1386 * We should always have a non-NULL state cred here, since if cred
1387 1387 * is null (anonymous tracing), we fast-path bypass this routine.
1388 1388 */
1389 1389 ASSERT(s_cr != NULL);
1390 1390
1391 1391 if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
1392 1392 return (1);
1393 1393
1394 1394 return (0);
1395 1395 }
1396 1396
1397 1397 /*
1398 1398 * This privilege check should be used by actions and subroutines to
1399 1399 * verify that the process has not setuid or changed credentials.
1400 1400 */
1401 1401 static int
1402 1402 dtrace_priv_proc_common_nocd()
1403 1403 {
1404 1404 proc_t *proc;
1405 1405
1406 1406 if ((proc = ttoproc(curthread)) != NULL &&
1407 1407 !(proc->p_flag & SNOCD))
1408 1408 return (1);
1409 1409
1410 1410 return (0);
1411 1411 }
1412 1412
1413 1413 static int
1414 1414 dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
1415 1415 {
1416 1416 int action = state->dts_cred.dcr_action;
1417 1417
1418 1418 if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
1419 1419 goto bad;
1420 1420
1421 1421 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1422 1422 dtrace_priv_proc_common_zone(state) == 0)
1423 1423 goto bad;
1424 1424
1425 1425 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1426 1426 dtrace_priv_proc_common_user(state) == 0)
1427 1427 goto bad;
1428 1428
1429 1429 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1430 1430 dtrace_priv_proc_common_nocd() == 0)
1431 1431 goto bad;
1432 1432
1433 1433 return (1);
1434 1434
1435 1435 bad:
1436 1436 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1437 1437
1438 1438 return (0);
1439 1439 }
1440 1440
1441 1441 static int
1442 1442 dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
1443 1443 {
1444 1444 if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
1445 1445 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1446 1446 return (1);
1447 1447
1448 1448 if (dtrace_priv_proc_common_zone(state) &&
1449 1449 dtrace_priv_proc_common_user(state) &&
1450 1450 dtrace_priv_proc_common_nocd())
1451 1451 return (1);
1452 1452 }
1453 1453
1454 1454 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1455 1455
1456 1456 return (0);
1457 1457 }
1458 1458
1459 1459 static int
1460 1460 dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
1461 1461 {
1462 1462 if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
1463 1463 (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
1464 1464 return (1);
1465 1465
1466 1466 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1467 1467
1468 1468 return (0);
1469 1469 }
1470 1470
1471 1471 static int
1472 1472 dtrace_priv_kernel(dtrace_state_t *state)
1473 1473 {
1474 1474 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1475 1475 return (1);
1476 1476
1477 1477 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1478 1478
1479 1479 return (0);
1480 1480 }
1481 1481
1482 1482 static int
1483 1483 dtrace_priv_kernel_destructive(dtrace_state_t *state)
1484 1484 {
1485 1485 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1486 1486 return (1);
1487 1487
1488 1488 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1489 1489
1490 1490 return (0);
1491 1491 }
1492 1492
1493 1493 /*
1494 1494 * Determine if the dte_cond of the specified ECB allows for processing of
1495 1495 * the current probe to continue. Note that this routine may allow continued
1496 1496 * processing, but with access(es) stripped from the mstate's dtms_access
1497 1497 * field.
1498 1498 */
1499 1499 static int
1500 1500 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1501 1501 dtrace_ecb_t *ecb)
1502 1502 {
1503 1503 dtrace_probe_t *probe = ecb->dte_probe;
1504 1504 dtrace_provider_t *prov = probe->dtpr_provider;
1505 1505 dtrace_pops_t *pops = &prov->dtpv_pops;
1506 1506 int mode = DTRACE_MODE_NOPRIV_DROP;
1507 1507
1508 1508 ASSERT(ecb->dte_cond);
1509 1509
1510 1510 if (pops->dtps_mode != NULL) {
1511 1511 mode = pops->dtps_mode(prov->dtpv_arg,
1512 1512 probe->dtpr_id, probe->dtpr_arg);
1513 1513
1514 1514 ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
1515 1515 ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
1516 1516 DTRACE_MODE_NOPRIV_DROP));
1517 1517 }
1518 1518
1519 1519 /*
1520 1520 * If the dte_cond bits indicate that this consumer is only allowed to
1521 1521 * see user-mode firings of this probe, check that the probe was fired
1522 1522 * while in a user context. If that's not the case, use the policy
1523 1523 * specified by the provider to determine if we drop the probe or
1524 1524 * merely restrict operation.
1525 1525 */
1526 1526 if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1527 1527 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1528 1528
1529 1529 if (!(mode & DTRACE_MODE_USER)) {
1530 1530 if (mode & DTRACE_MODE_NOPRIV_DROP)
1531 1531 return (0);
1532 1532
1533 1533 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1534 1534 }
1535 1535 }
1536 1536
1537 1537 /*
1538 1538 * This is more subtle than it looks. We have to be absolutely certain
1539 1539 * that CRED() isn't going to change out from under us so it's only
1540 1540 * legit to examine that structure if we're in constrained situations.
1541 1541 * Currently, the only times we'll this check is if a non-super-user
1542 1542 * has enabled the profile or syscall providers -- providers that
1543 1543 * allow visibility of all processes. For the profile case, the check
1544 1544 * above will ensure that we're examining a user context.
1545 1545 */
1546 1546 if (ecb->dte_cond & DTRACE_COND_OWNER) {
1547 1547 cred_t *cr;
1548 1548 cred_t *s_cr = state->dts_cred.dcr_cred;
1549 1549 proc_t *proc;
1550 1550
1551 1551 ASSERT(s_cr != NULL);
1552 1552
1553 1553 if ((cr = CRED()) == NULL ||
1554 1554 s_cr->cr_uid != cr->cr_uid ||
1555 1555 s_cr->cr_uid != cr->cr_ruid ||
1556 1556 s_cr->cr_uid != cr->cr_suid ||
1557 1557 s_cr->cr_gid != cr->cr_gid ||
1558 1558 s_cr->cr_gid != cr->cr_rgid ||
1559 1559 s_cr->cr_gid != cr->cr_sgid ||
1560 1560 (proc = ttoproc(curthread)) == NULL ||
1561 1561 (proc->p_flag & SNOCD)) {
1562 1562 if (mode & DTRACE_MODE_NOPRIV_DROP)
1563 1563 return (0);
1564 1564
1565 1565 mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
1566 1566 }
1567 1567 }
1568 1568
1569 1569 /*
1570 1570 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
1571 1571 * in our zone, check to see if our mode policy is to restrict rather
1572 1572 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1573 1573 * and DTRACE_ACCESS_ARGS
1574 1574 */
1575 1575 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1576 1576 cred_t *cr;
1577 1577 cred_t *s_cr = state->dts_cred.dcr_cred;
1578 1578
1579 1579 ASSERT(s_cr != NULL);
1580 1580
1581 1581 if ((cr = CRED()) == NULL ||
1582 1582 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1583 1583 if (mode & DTRACE_MODE_NOPRIV_DROP)
1584 1584 return (0);
1585 1585
1586 1586 mstate->dtms_access &=
1587 1587 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1588 1588 }
1589 1589 }
1590 1590
1591 1591 /*
1592 1592 * By merits of being in this code path at all, we have limited
1593 1593 * privileges. If the provider has indicated that limited privileges
1594 1594 * are to denote restricted operation, strip off the ability to access
1595 1595 * arguments.
1596 1596 */
1597 1597 if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
1598 1598 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1599 1599
1600 1600 return (1);
1601 1601 }
1602 1602
1603 1603 /*
1604 1604 * Note: not called from probe context. This function is called
1605 1605 * asynchronously (and at a regular interval) from outside of probe context to
1606 1606 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1607 1607 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1608 1608 */
1609 1609 void
1610 1610 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1611 1611 {
1612 1612 dtrace_dynvar_t *dirty;
1613 1613 dtrace_dstate_percpu_t *dcpu;
1614 1614 dtrace_dynvar_t **rinsep;
1615 1615 int i, j, work = 0;
1616 1616
1617 1617 for (i = 0; i < NCPU; i++) {
1618 1618 dcpu = &dstate->dtds_percpu[i];
1619 1619 rinsep = &dcpu->dtdsc_rinsing;
1620 1620
1621 1621 /*
1622 1622 * If the dirty list is NULL, there is no dirty work to do.
1623 1623 */
1624 1624 if (dcpu->dtdsc_dirty == NULL)
1625 1625 continue;
1626 1626
1627 1627 if (dcpu->dtdsc_rinsing != NULL) {
1628 1628 /*
1629 1629 * If the rinsing list is non-NULL, then it is because
1630 1630 * this CPU was selected to accept another CPU's
1631 1631 * dirty list -- and since that time, dirty buffers
1632 1632 * have accumulated. This is a highly unlikely
1633 1633 * condition, but we choose to ignore the dirty
1634 1634 * buffers -- they'll be picked up a future cleanse.
1635 1635 */
1636 1636 continue;
1637 1637 }
1638 1638
1639 1639 if (dcpu->dtdsc_clean != NULL) {
1640 1640 /*
1641 1641 * If the clean list is non-NULL, then we're in a
1642 1642 * situation where a CPU has done deallocations (we
1643 1643 * have a non-NULL dirty list) but no allocations (we
1644 1644 * also have a non-NULL clean list). We can't simply
1645 1645 * move the dirty list into the clean list on this
1646 1646 * CPU, yet we also don't want to allow this condition
1647 1647 * to persist, lest a short clean list prevent a
1648 1648 * massive dirty list from being cleaned (which in
1649 1649 * turn could lead to otherwise avoidable dynamic
1650 1650 * drops). To deal with this, we look for some CPU
1651 1651 * with a NULL clean list, NULL dirty list, and NULL
1652 1652 * rinsing list -- and then we borrow this CPU to
1653 1653 * rinse our dirty list.
1654 1654 */
1655 1655 for (j = 0; j < NCPU; j++) {
1656 1656 dtrace_dstate_percpu_t *rinser;
1657 1657
1658 1658 rinser = &dstate->dtds_percpu[j];
1659 1659
1660 1660 if (rinser->dtdsc_rinsing != NULL)
1661 1661 continue;
1662 1662
1663 1663 if (rinser->dtdsc_dirty != NULL)
1664 1664 continue;
1665 1665
1666 1666 if (rinser->dtdsc_clean != NULL)
1667 1667 continue;
1668 1668
1669 1669 rinsep = &rinser->dtdsc_rinsing;
1670 1670 break;
1671 1671 }
1672 1672
1673 1673 if (j == NCPU) {
1674 1674 /*
1675 1675 * We were unable to find another CPU that
1676 1676 * could accept this dirty list -- we are
1677 1677 * therefore unable to clean it now.
1678 1678 */
1679 1679 dtrace_dynvar_failclean++;
1680 1680 continue;
1681 1681 }
1682 1682 }
1683 1683
1684 1684 work = 1;
1685 1685
1686 1686 /*
1687 1687 * Atomically move the dirty list aside.
1688 1688 */
1689 1689 do {
1690 1690 dirty = dcpu->dtdsc_dirty;
1691 1691
1692 1692 /*
1693 1693 * Before we zap the dirty list, set the rinsing list.
1694 1694 * (This allows for a potential assertion in
1695 1695 * dtrace_dynvar(): if a free dynamic variable appears
1696 1696 * on a hash chain, either the dirty list or the
1697 1697 * rinsing list for some CPU must be non-NULL.)
1698 1698 */
1699 1699 *rinsep = dirty;
1700 1700 dtrace_membar_producer();
1701 1701 } while (dtrace_casptr(&dcpu->dtdsc_dirty,
1702 1702 dirty, NULL) != dirty);
1703 1703 }
1704 1704
1705 1705 if (!work) {
1706 1706 /*
1707 1707 * We have no work to do; we can simply return.
1708 1708 */
1709 1709 return;
1710 1710 }
1711 1711
1712 1712 dtrace_sync();
1713 1713
1714 1714 for (i = 0; i < NCPU; i++) {
1715 1715 dcpu = &dstate->dtds_percpu[i];
1716 1716
1717 1717 if (dcpu->dtdsc_rinsing == NULL)
1718 1718 continue;
1719 1719
1720 1720 /*
1721 1721 * We are now guaranteed that no hash chain contains a pointer
1722 1722 * into this dirty list; we can make it clean.
1723 1723 */
1724 1724 ASSERT(dcpu->dtdsc_clean == NULL);
1725 1725 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1726 1726 dcpu->dtdsc_rinsing = NULL;
1727 1727 }
1728 1728
1729 1729 /*
1730 1730 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1731 1731 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1732 1732 * This prevents a race whereby a CPU incorrectly decides that
1733 1733 * the state should be something other than DTRACE_DSTATE_CLEAN
1734 1734 * after dtrace_dynvar_clean() has completed.
1735 1735 */
1736 1736 dtrace_sync();
1737 1737
1738 1738 dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1739 1739 }
1740 1740
1741 1741 /*
1742 1742 * Depending on the value of the op parameter, this function looks-up,
1743 1743 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1744 1744 * allocation is requested, this function will return a pointer to a
1745 1745 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1746 1746 * variable can be allocated. If NULL is returned, the appropriate counter
1747 1747 * will be incremented.
1748 1748 */
1749 1749 dtrace_dynvar_t *
1750 1750 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
1751 1751 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1752 1752 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1753 1753 {
1754 1754 uint64_t hashval = DTRACE_DYNHASH_VALID;
1755 1755 dtrace_dynhash_t *hash = dstate->dtds_hash;
1756 1756 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1757 1757 processorid_t me = CPU->cpu_id, cpu = me;
1758 1758 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1759 1759 size_t bucket, ksize;
1760 1760 size_t chunksize = dstate->dtds_chunksize;
1761 1761 uintptr_t kdata, lock, nstate;
1762 1762 uint_t i;
1763 1763
1764 1764 ASSERT(nkeys != 0);
1765 1765
1766 1766 /*
1767 1767 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1768 1768 * algorithm. For the by-value portions, we perform the algorithm in
1769 1769 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1770 1770 * bit, and seems to have only a minute effect on distribution. For
1771 1771 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1772 1772 * over each referenced byte. It's painful to do this, but it's much
1773 1773 * better than pathological hash distribution. The efficacy of the
1774 1774 * hashing algorithm (and a comparison with other algorithms) may be
1775 1775 * found by running the ::dtrace_dynstat MDB dcmd.
1776 1776 */
1777 1777 for (i = 0; i < nkeys; i++) {
1778 1778 if (key[i].dttk_size == 0) {
1779 1779 uint64_t val = key[i].dttk_value;
1780 1780
1781 1781 hashval += (val >> 48) & 0xffff;
1782 1782 hashval += (hashval << 10);
1783 1783 hashval ^= (hashval >> 6);
1784 1784
1785 1785 hashval += (val >> 32) & 0xffff;
1786 1786 hashval += (hashval << 10);
1787 1787 hashval ^= (hashval >> 6);
1788 1788
1789 1789 hashval += (val >> 16) & 0xffff;
1790 1790 hashval += (hashval << 10);
1791 1791 hashval ^= (hashval >> 6);
1792 1792
1793 1793 hashval += val & 0xffff;
1794 1794 hashval += (hashval << 10);
1795 1795 hashval ^= (hashval >> 6);
1796 1796 } else {
1797 1797 /*
1798 1798 * This is incredibly painful, but it beats the hell
1799 1799 * out of the alternative.
1800 1800 */
1801 1801 uint64_t j, size = key[i].dttk_size;
1802 1802 uintptr_t base = (uintptr_t)key[i].dttk_value;
1803 1803
1804 1804 if (!dtrace_canload(base, size, mstate, vstate))
1805 1805 break;
1806 1806
1807 1807 for (j = 0; j < size; j++) {
1808 1808 hashval += dtrace_load8(base + j);
1809 1809 hashval += (hashval << 10);
1810 1810 hashval ^= (hashval >> 6);
1811 1811 }
1812 1812 }
1813 1813 }
1814 1814
1815 1815 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1816 1816 return (NULL);
1817 1817
1818 1818 hashval += (hashval << 3);
1819 1819 hashval ^= (hashval >> 11);
1820 1820 hashval += (hashval << 15);
1821 1821
1822 1822 /*
1823 1823 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1824 1824 * comes out to be one of our two sentinel hash values. If this
1825 1825 * actually happens, we set the hashval to be a value known to be a
1826 1826 * non-sentinel value.
1827 1827 */
1828 1828 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1829 1829 hashval = DTRACE_DYNHASH_VALID;
1830 1830
1831 1831 /*
1832 1832 * Yes, it's painful to do a divide here. If the cycle count becomes
1833 1833 * important here, tricks can be pulled to reduce it. (However, it's
1834 1834 * critical that hash collisions be kept to an absolute minimum;
1835 1835 * they're much more painful than a divide.) It's better to have a
1836 1836 * solution that generates few collisions and still keeps things
1837 1837 * relatively simple.
1838 1838 */
1839 1839 bucket = hashval % dstate->dtds_hashsize;
1840 1840
1841 1841 if (op == DTRACE_DYNVAR_DEALLOC) {
1842 1842 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
1843 1843
1844 1844 for (;;) {
1845 1845 while ((lock = *lockp) & 1)
1846 1846 continue;
1847 1847
1848 1848 if (dtrace_casptr((void *)lockp,
1849 1849 (void *)lock, (void *)(lock + 1)) == (void *)lock)
1850 1850 break;
1851 1851 }
1852 1852
1853 1853 dtrace_membar_producer();
1854 1854 }
1855 1855
1856 1856 top:
1857 1857 prev = NULL;
1858 1858 lock = hash[bucket].dtdh_lock;
1859 1859
1860 1860 dtrace_membar_consumer();
1861 1861
1862 1862 start = hash[bucket].dtdh_chain;
1863 1863 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
1864 1864 start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
1865 1865 op != DTRACE_DYNVAR_DEALLOC));
1866 1866
1867 1867 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
1868 1868 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
1869 1869 dtrace_key_t *dkey = &dtuple->dtt_key[0];
1870 1870
1871 1871 if (dvar->dtdv_hashval != hashval) {
1872 1872 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
1873 1873 /*
1874 1874 * We've reached the sink, and therefore the
1875 1875 * end of the hash chain; we can kick out of
1876 1876 * the loop knowing that we have seen a valid
1877 1877 * snapshot of state.
1878 1878 */
1879 1879 ASSERT(dvar->dtdv_next == NULL);
1880 1880 ASSERT(dvar == &dtrace_dynhash_sink);
1881 1881 break;
1882 1882 }
1883 1883
1884 1884 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
1885 1885 /*
1886 1886 * We've gone off the rails: somewhere along
1887 1887 * the line, one of the members of this hash
1888 1888 * chain was deleted. Note that we could also
1889 1889 * detect this by simply letting this loop run
1890 1890 * to completion, as we would eventually hit
1891 1891 * the end of the dirty list. However, we
1892 1892 * want to avoid running the length of the
1893 1893 * dirty list unnecessarily (it might be quite
1894 1894 * long), so we catch this as early as
1895 1895 * possible by detecting the hash marker. In
1896 1896 * this case, we simply set dvar to NULL and
1897 1897 * break; the conditional after the loop will
1898 1898 * send us back to top.
1899 1899 */
1900 1900 dvar = NULL;
1901 1901 break;
1902 1902 }
1903 1903
1904 1904 goto next;
1905 1905 }
1906 1906
1907 1907 if (dtuple->dtt_nkeys != nkeys)
1908 1908 goto next;
1909 1909
1910 1910 for (i = 0; i < nkeys; i++, dkey++) {
1911 1911 if (dkey->dttk_size != key[i].dttk_size)
1912 1912 goto next; /* size or type mismatch */
1913 1913
1914 1914 if (dkey->dttk_size != 0) {
1915 1915 if (dtrace_bcmp(
1916 1916 (void *)(uintptr_t)key[i].dttk_value,
1917 1917 (void *)(uintptr_t)dkey->dttk_value,
1918 1918 dkey->dttk_size))
1919 1919 goto next;
1920 1920 } else {
1921 1921 if (dkey->dttk_value != key[i].dttk_value)
1922 1922 goto next;
1923 1923 }
1924 1924 }
1925 1925
1926 1926 if (op != DTRACE_DYNVAR_DEALLOC)
1927 1927 return (dvar);
1928 1928
1929 1929 ASSERT(dvar->dtdv_next == NULL ||
1930 1930 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
1931 1931
1932 1932 if (prev != NULL) {
1933 1933 ASSERT(hash[bucket].dtdh_chain != dvar);
1934 1934 ASSERT(start != dvar);
1935 1935 ASSERT(prev->dtdv_next == dvar);
1936 1936 prev->dtdv_next = dvar->dtdv_next;
1937 1937 } else {
1938 1938 if (dtrace_casptr(&hash[bucket].dtdh_chain,
1939 1939 start, dvar->dtdv_next) != start) {
1940 1940 /*
1941 1941 * We have failed to atomically swing the
1942 1942 * hash table head pointer, presumably because
1943 1943 * of a conflicting allocation on another CPU.
1944 1944 * We need to reread the hash chain and try
1945 1945 * again.
1946 1946 */
1947 1947 goto top;
1948 1948 }
1949 1949 }
1950 1950
1951 1951 dtrace_membar_producer();
1952 1952
1953 1953 /*
1954 1954 * Now set the hash value to indicate that it's free.
1955 1955 */
1956 1956 ASSERT(hash[bucket].dtdh_chain != dvar);
1957 1957 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1958 1958
1959 1959 dtrace_membar_producer();
1960 1960
1961 1961 /*
1962 1962 * Set the next pointer to point at the dirty list, and
1963 1963 * atomically swing the dirty pointer to the newly freed dvar.
1964 1964 */
1965 1965 do {
1966 1966 next = dcpu->dtdsc_dirty;
1967 1967 dvar->dtdv_next = next;
1968 1968 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
1969 1969
1970 1970 /*
1971 1971 * Finally, unlock this hash bucket.
1972 1972 */
1973 1973 ASSERT(hash[bucket].dtdh_lock == lock);
1974 1974 ASSERT(lock & 1);
1975 1975 hash[bucket].dtdh_lock++;
1976 1976
1977 1977 return (NULL);
1978 1978 next:
1979 1979 prev = dvar;
1980 1980 continue;
1981 1981 }
1982 1982
1983 1983 if (dvar == NULL) {
1984 1984 /*
1985 1985 * If dvar is NULL, it is because we went off the rails:
1986 1986 * one of the elements that we traversed in the hash chain
1987 1987 * was deleted while we were traversing it. In this case,
1988 1988 * we assert that we aren't doing a dealloc (deallocs lock
1989 1989 * the hash bucket to prevent themselves from racing with
1990 1990 * one another), and retry the hash chain traversal.
1991 1991 */
1992 1992 ASSERT(op != DTRACE_DYNVAR_DEALLOC);
1993 1993 goto top;
1994 1994 }
1995 1995
1996 1996 if (op != DTRACE_DYNVAR_ALLOC) {
1997 1997 /*
1998 1998 * If we are not to allocate a new variable, we want to
1999 1999 * return NULL now. Before we return, check that the value
2000 2000 * of the lock word hasn't changed. If it has, we may have
2001 2001 * seen an inconsistent snapshot.
2002 2002 */
2003 2003 if (op == DTRACE_DYNVAR_NOALLOC) {
2004 2004 if (hash[bucket].dtdh_lock != lock)
2005 2005 goto top;
2006 2006 } else {
2007 2007 ASSERT(op == DTRACE_DYNVAR_DEALLOC);
2008 2008 ASSERT(hash[bucket].dtdh_lock == lock);
2009 2009 ASSERT(lock & 1);
2010 2010 hash[bucket].dtdh_lock++;
2011 2011 }
2012 2012
2013 2013 return (NULL);
2014 2014 }
2015 2015
2016 2016 /*
2017 2017 * We need to allocate a new dynamic variable. The size we need is the
2018 2018 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
2019 2019 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
2020 2020 * the size of any referred-to data (dsize). We then round the final
2021 2021 * size up to the chunksize for allocation.
2022 2022 */
2023 2023 for (ksize = 0, i = 0; i < nkeys; i++)
2024 2024 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
2025 2025
2026 2026 /*
2027 2027 * This should be pretty much impossible, but could happen if, say,
2028 2028 * strange DIF specified the tuple. Ideally, this should be an
2029 2029 * assertion and not an error condition -- but that requires that the
2030 2030 * chunksize calculation in dtrace_difo_chunksize() be absolutely
2031 2031 * bullet-proof. (That is, it must not be able to be fooled by
2032 2032 * malicious DIF.) Given the lack of backwards branches in DIF,
2033 2033 * solving this would presumably not amount to solving the Halting
2034 2034 * Problem -- but it still seems awfully hard.
2035 2035 */
2036 2036 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
2037 2037 ksize + dsize > chunksize) {
2038 2038 dcpu->dtdsc_drops++;
2039 2039 return (NULL);
2040 2040 }
2041 2041
2042 2042 nstate = DTRACE_DSTATE_EMPTY;
2043 2043
2044 2044 do {
2045 2045 retry:
2046 2046 free = dcpu->dtdsc_free;
2047 2047
2048 2048 if (free == NULL) {
2049 2049 dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
2050 2050 void *rval;
2051 2051
2052 2052 if (clean == NULL) {
2053 2053 /*
2054 2054 * We're out of dynamic variable space on
2055 2055 * this CPU. Unless we have tried all CPUs,
2056 2056 * we'll try to allocate from a different
2057 2057 * CPU.
2058 2058 */
2059 2059 switch (dstate->dtds_state) {
2060 2060 case DTRACE_DSTATE_CLEAN: {
2061 2061 void *sp = &dstate->dtds_state;
2062 2062
2063 2063 if (++cpu >= NCPU)
2064 2064 cpu = 0;
2065 2065
2066 2066 if (dcpu->dtdsc_dirty != NULL &&
2067 2067 nstate == DTRACE_DSTATE_EMPTY)
2068 2068 nstate = DTRACE_DSTATE_DIRTY;
2069 2069
2070 2070 if (dcpu->dtdsc_rinsing != NULL)
2071 2071 nstate = DTRACE_DSTATE_RINSING;
2072 2072
2073 2073 dcpu = &dstate->dtds_percpu[cpu];
2074 2074
2075 2075 if (cpu != me)
2076 2076 goto retry;
2077 2077
2078 2078 (void) dtrace_cas32(sp,
2079 2079 DTRACE_DSTATE_CLEAN, nstate);
2080 2080
2081 2081 /*
2082 2082 * To increment the correct bean
2083 2083 * counter, take another lap.
2084 2084 */
2085 2085 goto retry;
2086 2086 }
2087 2087
2088 2088 case DTRACE_DSTATE_DIRTY:
2089 2089 dcpu->dtdsc_dirty_drops++;
2090 2090 break;
2091 2091
2092 2092 case DTRACE_DSTATE_RINSING:
2093 2093 dcpu->dtdsc_rinsing_drops++;
2094 2094 break;
2095 2095
2096 2096 case DTRACE_DSTATE_EMPTY:
2097 2097 dcpu->dtdsc_drops++;
2098 2098 break;
2099 2099 }
2100 2100
2101 2101 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
2102 2102 return (NULL);
2103 2103 }
2104 2104
2105 2105 /*
2106 2106 * The clean list appears to be non-empty. We want to
2107 2107 * move the clean list to the free list; we start by
2108 2108 * moving the clean pointer aside.
2109 2109 */
2110 2110 if (dtrace_casptr(&dcpu->dtdsc_clean,
2111 2111 clean, NULL) != clean) {
2112 2112 /*
2113 2113 * We are in one of two situations:
2114 2114 *
2115 2115 * (a) The clean list was switched to the
2116 2116 * free list by another CPU.
2117 2117 *
2118 2118 * (b) The clean list was added to by the
2119 2119 * cleansing cyclic.
2120 2120 *
2121 2121 * In either of these situations, we can
2122 2122 * just reattempt the free list allocation.
2123 2123 */
2124 2124 goto retry;
2125 2125 }
2126 2126
2127 2127 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
2128 2128
2129 2129 /*
2130 2130 * Now we'll move the clean list to our free list.
2131 2131 * It's impossible for this to fail: the only way
2132 2132 * the free list can be updated is through this
2133 2133 * code path, and only one CPU can own the clean list.
2134 2134 * Thus, it would only be possible for this to fail if
2135 2135 * this code were racing with dtrace_dynvar_clean().
2136 2136 * (That is, if dtrace_dynvar_clean() updated the clean
2137 2137 * list, and we ended up racing to update the free
2138 2138 * list.) This race is prevented by the dtrace_sync()
2139 2139 * in dtrace_dynvar_clean() -- which flushes the
2140 2140 * owners of the clean lists out before resetting
2141 2141 * the clean lists.
2142 2142 */
2143 2143 dcpu = &dstate->dtds_percpu[me];
2144 2144 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
2145 2145 ASSERT(rval == NULL);
2146 2146 goto retry;
2147 2147 }
2148 2148
2149 2149 dvar = free;
2150 2150 new_free = dvar->dtdv_next;
2151 2151 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
2152 2152
2153 2153 /*
2154 2154 * We have now allocated a new chunk. We copy the tuple keys into the
2155 2155 * tuple array and copy any referenced key data into the data space
2156 2156 * following the tuple array. As we do this, we relocate dttk_value
2157 2157 * in the final tuple to point to the key data address in the chunk.
2158 2158 */
2159 2159 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
2160 2160 dvar->dtdv_data = (void *)(kdata + ksize);
2161 2161 dvar->dtdv_tuple.dtt_nkeys = nkeys;
2162 2162
2163 2163 for (i = 0; i < nkeys; i++) {
2164 2164 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
2165 2165 size_t kesize = key[i].dttk_size;
2166 2166
2167 2167 if (kesize != 0) {
2168 2168 dtrace_bcopy(
2169 2169 (const void *)(uintptr_t)key[i].dttk_value,
2170 2170 (void *)kdata, kesize);
2171 2171 dkey->dttk_value = kdata;
2172 2172 kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
2173 2173 } else {
2174 2174 dkey->dttk_value = key[i].dttk_value;
2175 2175 }
2176 2176
2177 2177 dkey->dttk_size = kesize;
2178 2178 }
2179 2179
2180 2180 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
2181 2181 dvar->dtdv_hashval = hashval;
2182 2182 dvar->dtdv_next = start;
2183 2183
2184 2184 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
2185 2185 return (dvar);
2186 2186
2187 2187 /*
2188 2188 * The cas has failed. Either another CPU is adding an element to
2189 2189 * this hash chain, or another CPU is deleting an element from this
2190 2190 * hash chain. The simplest way to deal with both of these cases
2191 2191 * (though not necessarily the most efficient) is to free our
2192 2192 * allocated block and re-attempt it all. Note that the free is
2193 2193 * to the dirty list and _not_ to the free list. This is to prevent
2194 2194 * races with allocators, above.
2195 2195 */
2196 2196 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
2197 2197
2198 2198 dtrace_membar_producer();
2199 2199
2200 2200 do {
2201 2201 free = dcpu->dtdsc_dirty;
2202 2202 dvar->dtdv_next = free;
2203 2203 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
2204 2204
2205 2205 goto top;
2206 2206 }
2207 2207
2208 2208 /*ARGSUSED*/
2209 2209 static void
2210 2210 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
2211 2211 {
2212 2212 if ((int64_t)nval < (int64_t)*oval)
2213 2213 *oval = nval;
2214 2214 }
2215 2215
2216 2216 /*ARGSUSED*/
2217 2217 static void
2218 2218 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
2219 2219 {
2220 2220 if ((int64_t)nval > (int64_t)*oval)
2221 2221 *oval = nval;
2222 2222 }
2223 2223
2224 2224 static void
2225 2225 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
2226 2226 {
2227 2227 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
2228 2228 int64_t val = (int64_t)nval;
2229 2229
2230 2230 if (val < 0) {
2231 2231 for (i = 0; i < zero; i++) {
2232 2232 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
2233 2233 quanta[i] += incr;
2234 2234 return;
2235 2235 }
2236 2236 }
2237 2237 } else {
2238 2238 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
2239 2239 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
2240 2240 quanta[i - 1] += incr;
2241 2241 return;
2242 2242 }
2243 2243 }
2244 2244
2245 2245 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
2246 2246 return;
2247 2247 }
2248 2248
2249 2249 ASSERT(0);
2250 2250 }
2251 2251
2252 2252 static void
2253 2253 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
2254 2254 {
2255 2255 uint64_t arg = *lquanta++;
2256 2256 int32_t base = DTRACE_LQUANTIZE_BASE(arg);
2257 2257 uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
2258 2258 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
2259 2259 int32_t val = (int32_t)nval, level;
2260 2260
2261 2261 ASSERT(step != 0);
2262 2262 ASSERT(levels != 0);
2263 2263
2264 2264 if (val < base) {
2265 2265 /*
2266 2266 * This is an underflow.
2267 2267 */
2268 2268 lquanta[0] += incr;
2269 2269 return;
2270 2270 }
2271 2271
2272 2272 level = (val - base) / step;
2273 2273
2274 2274 if (level < levels) {
2275 2275 lquanta[level + 1] += incr;
2276 2276 return;
2277 2277 }
2278 2278
2279 2279 /*
2280 2280 * This is an overflow.
2281 2281 */
2282 2282 lquanta[levels + 1] += incr;
2283 2283 }
2284 2284
2285 2285 static int
2286 2286 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
2287 2287 uint16_t high, uint16_t nsteps, int64_t value)
2288 2288 {
2289 2289 int64_t this = 1, last, next;
2290 2290 int base = 1, order;
2291 2291
2292 2292 ASSERT(factor <= nsteps);
2293 2293 ASSERT(nsteps % factor == 0);
2294 2294
2295 2295 for (order = 0; order < low; order++)
2296 2296 this *= factor;
2297 2297
2298 2298 /*
2299 2299 * If our value is less than our factor taken to the power of the
2300 2300 * low order of magnitude, it goes into the zeroth bucket.
2301 2301 */
2302 2302 if (value < (last = this))
2303 2303 return (0);
2304 2304
2305 2305 for (this *= factor; order <= high; order++) {
2306 2306 int nbuckets = this > nsteps ? nsteps : this;
2307 2307
2308 2308 if ((next = this * factor) < this) {
2309 2309 /*
2310 2310 * We should not generally get log/linear quantizations
2311 2311 * with a high magnitude that allows 64-bits to
2312 2312 * overflow, but we nonetheless protect against this
2313 2313 * by explicitly checking for overflow, and clamping
2314 2314 * our value accordingly.
2315 2315 */
2316 2316 value = this - 1;
2317 2317 }
2318 2318
2319 2319 if (value < this) {
2320 2320 /*
2321 2321 * If our value lies within this order of magnitude,
2322 2322 * determine its position by taking the offset within
2323 2323 * the order of magnitude, dividing by the bucket
2324 2324 * width, and adding to our (accumulated) base.
2325 2325 */
2326 2326 return (base + (value - last) / (this / nbuckets));
2327 2327 }
2328 2328
2329 2329 base += nbuckets - (nbuckets / factor);
2330 2330 last = this;
2331 2331 this = next;
2332 2332 }
2333 2333
2334 2334 /*
2335 2335 * Our value is greater than or equal to our factor taken to the
2336 2336 * power of one plus the high magnitude -- return the top bucket.
2337 2337 */
2338 2338 return (base);
2339 2339 }
2340 2340
2341 2341 static void
2342 2342 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2343 2343 {
2344 2344 uint64_t arg = *llquanta++;
2345 2345 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2346 2346 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2347 2347 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
2348 2348 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
2349 2349
2350 2350 llquanta[dtrace_aggregate_llquantize_bucket(factor,
2351 2351 low, high, nsteps, nval)] += incr;
2352 2352 }
2353 2353
2354 2354 /*ARGSUSED*/
2355 2355 static void
2356 2356 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2357 2357 {
2358 2358 data[0]++;
2359 2359 data[1] += nval;
2360 2360 }
2361 2361
2362 2362 /*ARGSUSED*/
2363 2363 static void
2364 2364 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2365 2365 {
2366 2366 int64_t snval = (int64_t)nval;
2367 2367 uint64_t tmp[2];
2368 2368
2369 2369 data[0]++;
2370 2370 data[1] += nval;
2371 2371
2372 2372 /*
2373 2373 * What we want to say here is:
2374 2374 *
2375 2375 * data[2] += nval * nval;
2376 2376 *
2377 2377 * But given that nval is 64-bit, we could easily overflow, so
2378 2378 * we do this as 128-bit arithmetic.
2379 2379 */
2380 2380 if (snval < 0)
2381 2381 snval = -snval;
2382 2382
2383 2383 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2384 2384 dtrace_add_128(data + 2, tmp, data + 2);
2385 2385 }
2386 2386
2387 2387 /*ARGSUSED*/
2388 2388 static void
2389 2389 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2390 2390 {
2391 2391 *oval = *oval + 1;
2392 2392 }
2393 2393
2394 2394 /*ARGSUSED*/
2395 2395 static void
2396 2396 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2397 2397 {
2398 2398 *oval += nval;
2399 2399 }
2400 2400
2401 2401 /*
2402 2402 * Aggregate given the tuple in the principal data buffer, and the aggregating
2403 2403 * action denoted by the specified dtrace_aggregation_t. The aggregation
2404 2404 * buffer is specified as the buf parameter. This routine does not return
2405 2405 * failure; if there is no space in the aggregation buffer, the data will be
2406 2406 * dropped, and a corresponding counter incremented.
2407 2407 */
2408 2408 static void
2409 2409 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2410 2410 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2411 2411 {
2412 2412 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2413 2413 uint32_t i, ndx, size, fsize;
2414 2414 uint32_t align = sizeof (uint64_t) - 1;
2415 2415 dtrace_aggbuffer_t *agb;
2416 2416 dtrace_aggkey_t *key;
2417 2417 uint32_t hashval = 0, limit, isstr;
2418 2418 caddr_t tomax, data, kdata;
2419 2419 dtrace_actkind_t action;
2420 2420 dtrace_action_t *act;
2421 2421 uintptr_t offs;
2422 2422
2423 2423 if (buf == NULL)
2424 2424 return;
2425 2425
2426 2426 if (!agg->dtag_hasarg) {
2427 2427 /*
2428 2428 * Currently, only quantize() and lquantize() take additional
2429 2429 * arguments, and they have the same semantics: an increment
2430 2430 * value that defaults to 1 when not present. If additional
2431 2431 * aggregating actions take arguments, the setting of the
2432 2432 * default argument value will presumably have to become more
2433 2433 * sophisticated...
2434 2434 */
2435 2435 arg = 1;
2436 2436 }
2437 2437
2438 2438 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2439 2439 size = rec->dtrd_offset - agg->dtag_base;
2440 2440 fsize = size + rec->dtrd_size;
2441 2441
2442 2442 ASSERT(dbuf->dtb_tomax != NULL);
2443 2443 data = dbuf->dtb_tomax + offset + agg->dtag_base;
2444 2444
2445 2445 if ((tomax = buf->dtb_tomax) == NULL) {
2446 2446 dtrace_buffer_drop(buf);
2447 2447 return;
2448 2448 }
2449 2449
2450 2450 /*
2451 2451 * The metastructure is always at the bottom of the buffer.
2452 2452 */
2453 2453 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2454 2454 sizeof (dtrace_aggbuffer_t));
2455 2455
2456 2456 if (buf->dtb_offset == 0) {
2457 2457 /*
2458 2458 * We just kludge up approximately 1/8th of the size to be
2459 2459 * buckets. If this guess ends up being routinely
2460 2460 * off-the-mark, we may need to dynamically readjust this
2461 2461 * based on past performance.
2462 2462 */
2463 2463 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2464 2464
2465 2465 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2466 2466 (uintptr_t)tomax || hashsize == 0) {
2467 2467 /*
2468 2468 * We've been given a ludicrously small buffer;
2469 2469 * increment our drop count and leave.
2470 2470 */
2471 2471 dtrace_buffer_drop(buf);
2472 2472 return;
2473 2473 }
2474 2474
2475 2475 /*
2476 2476 * And now, a pathetic attempt to try to get a an odd (or
2477 2477 * perchance, a prime) hash size for better hash distribution.
2478 2478 */
2479 2479 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2480 2480 hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2481 2481
2482 2482 agb->dtagb_hashsize = hashsize;
2483 2483 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2484 2484 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2485 2485 agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2486 2486
2487 2487 for (i = 0; i < agb->dtagb_hashsize; i++)
2488 2488 agb->dtagb_hash[i] = NULL;
2489 2489 }
2490 2490
2491 2491 ASSERT(agg->dtag_first != NULL);
2492 2492 ASSERT(agg->dtag_first->dta_intuple);
2493 2493
2494 2494 /*
2495 2495 * Calculate the hash value based on the key. Note that we _don't_
2496 2496 * include the aggid in the hashing (but we will store it as part of
2497 2497 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2498 2498 * algorithm: a simple, quick algorithm that has no known funnels, and
2499 2499 * gets good distribution in practice. The efficacy of the hashing
2500 2500 * algorithm (and a comparison with other algorithms) may be found by
2501 2501 * running the ::dtrace_aggstat MDB dcmd.
2502 2502 */
2503 2503 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2504 2504 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2505 2505 limit = i + act->dta_rec.dtrd_size;
2506 2506 ASSERT(limit <= size);
2507 2507 isstr = DTRACEACT_ISSTRING(act);
2508 2508
2509 2509 for (; i < limit; i++) {
2510 2510 hashval += data[i];
2511 2511 hashval += (hashval << 10);
2512 2512 hashval ^= (hashval >> 6);
2513 2513
2514 2514 if (isstr && data[i] == '\0')
2515 2515 break;
2516 2516 }
2517 2517 }
2518 2518
2519 2519 hashval += (hashval << 3);
2520 2520 hashval ^= (hashval >> 11);
2521 2521 hashval += (hashval << 15);
2522 2522
2523 2523 /*
2524 2524 * Yes, the divide here is expensive -- but it's generally the least
2525 2525 * of the performance issues given the amount of data that we iterate
2526 2526 * over to compute hash values, compare data, etc.
2527 2527 */
2528 2528 ndx = hashval % agb->dtagb_hashsize;
2529 2529
2530 2530 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2531 2531 ASSERT((caddr_t)key >= tomax);
2532 2532 ASSERT((caddr_t)key < tomax + buf->dtb_size);
2533 2533
2534 2534 if (hashval != key->dtak_hashval || key->dtak_size != size)
2535 2535 continue;
2536 2536
2537 2537 kdata = key->dtak_data;
2538 2538 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2539 2539
2540 2540 for (act = agg->dtag_first; act->dta_intuple;
2541 2541 act = act->dta_next) {
2542 2542 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2543 2543 limit = i + act->dta_rec.dtrd_size;
2544 2544 ASSERT(limit <= size);
2545 2545 isstr = DTRACEACT_ISSTRING(act);
2546 2546
2547 2547 for (; i < limit; i++) {
2548 2548 if (kdata[i] != data[i])
2549 2549 goto next;
2550 2550
2551 2551 if (isstr && data[i] == '\0')
2552 2552 break;
2553 2553 }
2554 2554 }
2555 2555
2556 2556 if (action != key->dtak_action) {
2557 2557 /*
2558 2558 * We are aggregating on the same value in the same
2559 2559 * aggregation with two different aggregating actions.
2560 2560 * (This should have been picked up in the compiler,
2561 2561 * so we may be dealing with errant or devious DIF.)
2562 2562 * This is an error condition; we indicate as much,
2563 2563 * and return.
2564 2564 */
2565 2565 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2566 2566 return;
2567 2567 }
2568 2568
2569 2569 /*
2570 2570 * This is a hit: we need to apply the aggregator to
2571 2571 * the value at this key.
2572 2572 */
2573 2573 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2574 2574 return;
2575 2575 next:
2576 2576 continue;
2577 2577 }
2578 2578
2579 2579 /*
2580 2580 * We didn't find it. We need to allocate some zero-filled space,
2581 2581 * link it into the hash table appropriately, and apply the aggregator
2582 2582 * to the (zero-filled) value.
2583 2583 */
2584 2584 offs = buf->dtb_offset;
2585 2585 while (offs & (align - 1))
2586 2586 offs += sizeof (uint32_t);
2587 2587
2588 2588 /*
2589 2589 * If we don't have enough room to both allocate a new key _and_
2590 2590 * its associated data, increment the drop count and return.
2591 2591 */
2592 2592 if ((uintptr_t)tomax + offs + fsize >
2593 2593 agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2594 2594 dtrace_buffer_drop(buf);
2595 2595 return;
2596 2596 }
2597 2597
2598 2598 /*CONSTCOND*/
2599 2599 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2600 2600 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2601 2601 agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2602 2602
2603 2603 key->dtak_data = kdata = tomax + offs;
2604 2604 buf->dtb_offset = offs + fsize;
2605 2605
2606 2606 /*
2607 2607 * Now copy the data across.
2608 2608 */
2609 2609 *((dtrace_aggid_t *)kdata) = agg->dtag_id;
2610 2610
2611 2611 for (i = sizeof (dtrace_aggid_t); i < size; i++)
2612 2612 kdata[i] = data[i];
2613 2613
2614 2614 /*
2615 2615 * Because strings are not zeroed out by default, we need to iterate
2616 2616 * looking for actions that store strings, and we need to explicitly
2617 2617 * pad these strings out with zeroes.
2618 2618 */
2619 2619 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2620 2620 int nul;
2621 2621
2622 2622 if (!DTRACEACT_ISSTRING(act))
2623 2623 continue;
2624 2624
2625 2625 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2626 2626 limit = i + act->dta_rec.dtrd_size;
2627 2627 ASSERT(limit <= size);
2628 2628
2629 2629 for (nul = 0; i < limit; i++) {
2630 2630 if (nul) {
2631 2631 kdata[i] = '\0';
2632 2632 continue;
2633 2633 }
2634 2634
2635 2635 if (data[i] != '\0')
2636 2636 continue;
2637 2637
2638 2638 nul = 1;
2639 2639 }
2640 2640 }
2641 2641
2642 2642 for (i = size; i < fsize; i++)
2643 2643 kdata[i] = 0;
2644 2644
2645 2645 key->dtak_hashval = hashval;
2646 2646 key->dtak_size = size;
2647 2647 key->dtak_action = action;
2648 2648 key->dtak_next = agb->dtagb_hash[ndx];
2649 2649 agb->dtagb_hash[ndx] = key;
2650 2650
2651 2651 /*
2652 2652 * Finally, apply the aggregator.
2653 2653 */
2654 2654 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2655 2655 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2656 2656 }
2657 2657
2658 2658 /*
2659 2659 * Given consumer state, this routine finds a speculation in the INACTIVE
2660 2660 * state and transitions it into the ACTIVE state. If there is no speculation
2661 2661 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2662 2662 * incremented -- it is up to the caller to take appropriate action.
2663 2663 */
2664 2664 static int
2665 2665 dtrace_speculation(dtrace_state_t *state)
2666 2666 {
2667 2667 int i = 0;
2668 2668 dtrace_speculation_state_t current;
2669 2669 uint32_t *stat = &state->dts_speculations_unavail, count;
2670 2670
2671 2671 while (i < state->dts_nspeculations) {
2672 2672 dtrace_speculation_t *spec = &state->dts_speculations[i];
2673 2673
2674 2674 current = spec->dtsp_state;
2675 2675
2676 2676 if (current != DTRACESPEC_INACTIVE) {
2677 2677 if (current == DTRACESPEC_COMMITTINGMANY ||
2678 2678 current == DTRACESPEC_COMMITTING ||
2679 2679 current == DTRACESPEC_DISCARDING)
2680 2680 stat = &state->dts_speculations_busy;
2681 2681 i++;
2682 2682 continue;
2683 2683 }
2684 2684
2685 2685 if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2686 2686 current, DTRACESPEC_ACTIVE) == current)
2687 2687 return (i + 1);
2688 2688 }
2689 2689
2690 2690 /*
2691 2691 * We couldn't find a speculation. If we found as much as a single
2692 2692 * busy speculation buffer, we'll attribute this failure as "busy"
2693 2693 * instead of "unavail".
2694 2694 */
2695 2695 do {
2696 2696 count = *stat;
2697 2697 } while (dtrace_cas32(stat, count, count + 1) != count);
2698 2698
2699 2699 return (0);
2700 2700 }
2701 2701
2702 2702 /*
2703 2703 * This routine commits an active speculation. If the specified speculation
2704 2704 * is not in a valid state to perform a commit(), this routine will silently do
2705 2705 * nothing. The state of the specified speculation is transitioned according
2706 2706 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2707 2707 */
2708 2708 static void
2709 2709 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2710 2710 dtrace_specid_t which)
2711 2711 {
2712 2712 dtrace_speculation_t *spec;
2713 2713 dtrace_buffer_t *src, *dest;
2714 2714 uintptr_t daddr, saddr, dlimit, slimit;
2715 2715 dtrace_speculation_state_t current, new;
2716 2716 intptr_t offs;
2717 2717 uint64_t timestamp;
2718 2718
2719 2719 if (which == 0)
2720 2720 return;
2721 2721
2722 2722 if (which > state->dts_nspeculations) {
2723 2723 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2724 2724 return;
2725 2725 }
2726 2726
2727 2727 spec = &state->dts_speculations[which - 1];
2728 2728 src = &spec->dtsp_buffer[cpu];
2729 2729 dest = &state->dts_buffer[cpu];
2730 2730
2731 2731 do {
2732 2732 current = spec->dtsp_state;
2733 2733
2734 2734 if (current == DTRACESPEC_COMMITTINGMANY)
2735 2735 break;
2736 2736
2737 2737 switch (current) {
2738 2738 case DTRACESPEC_INACTIVE:
2739 2739 case DTRACESPEC_DISCARDING:
2740 2740 return;
2741 2741
2742 2742 case DTRACESPEC_COMMITTING:
2743 2743 /*
2744 2744 * This is only possible if we are (a) commit()'ing
2745 2745 * without having done a prior speculate() on this CPU
2746 2746 * and (b) racing with another commit() on a different
2747 2747 * CPU. There's nothing to do -- we just assert that
2748 2748 * our offset is 0.
2749 2749 */
2750 2750 ASSERT(src->dtb_offset == 0);
2751 2751 return;
2752 2752
2753 2753 case DTRACESPEC_ACTIVE:
2754 2754 new = DTRACESPEC_COMMITTING;
2755 2755 break;
2756 2756
2757 2757 case DTRACESPEC_ACTIVEONE:
2758 2758 /*
2759 2759 * This speculation is active on one CPU. If our
2760 2760 * buffer offset is non-zero, we know that the one CPU
2761 2761 * must be us. Otherwise, we are committing on a
2762 2762 * different CPU from the speculate(), and we must
2763 2763 * rely on being asynchronously cleaned.
2764 2764 */
2765 2765 if (src->dtb_offset != 0) {
2766 2766 new = DTRACESPEC_COMMITTING;
2767 2767 break;
2768 2768 }
2769 2769 /*FALLTHROUGH*/
2770 2770
2771 2771 case DTRACESPEC_ACTIVEMANY:
2772 2772 new = DTRACESPEC_COMMITTINGMANY;
2773 2773 break;
2774 2774
2775 2775 default:
2776 2776 ASSERT(0);
2777 2777 }
2778 2778 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2779 2779 current, new) != current);
2780 2780
2781 2781 /*
2782 2782 * We have set the state to indicate that we are committing this
2783 2783 * speculation. Now reserve the necessary space in the destination
2784 2784 * buffer.
2785 2785 */
2786 2786 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2787 2787 sizeof (uint64_t), state, NULL)) < 0) {
2788 2788 dtrace_buffer_drop(dest);
2789 2789 goto out;
2790 2790 }
2791 2791
2792 2792 /*
2793 2793 * We have sufficient space to copy the speculative buffer into the
2794 2794 * primary buffer. First, modify the speculative buffer, filling
2795 2795 * in the timestamp of all entries with the current time. The data
2796 2796 * must have the commit() time rather than the time it was traced,
2797 2797 * so that all entries in the primary buffer are in timestamp order.
2798 2798 */
2799 2799 timestamp = dtrace_gethrtime();
2800 2800 saddr = (uintptr_t)src->dtb_tomax;
2801 2801 slimit = saddr + src->dtb_offset;
2802 2802 while (saddr < slimit) {
2803 2803 size_t size;
2804 2804 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
2805 2805
2806 2806 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
2807 2807 saddr += sizeof (dtrace_epid_t);
2808 2808 continue;
2809 2809 }
2810 2810 ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
2811 2811 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
2812 2812
2813 2813 ASSERT3U(saddr + size, <=, slimit);
2814 2814 ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
2815 2815 ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
2816 2816
2817 2817 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
2818 2818
2819 2819 saddr += size;
2820 2820 }
2821 2821
2822 2822 /*
2823 2823 * Copy the buffer across. (Note that this is a
2824 2824 * highly subobtimal bcopy(); in the unlikely event that this becomes
2825 2825 * a serious performance issue, a high-performance DTrace-specific
2826 2826 * bcopy() should obviously be invented.)
2827 2827 */
2828 2828 daddr = (uintptr_t)dest->dtb_tomax + offs;
2829 2829 dlimit = daddr + src->dtb_offset;
2830 2830 saddr = (uintptr_t)src->dtb_tomax;
2831 2831
2832 2832 /*
2833 2833 * First, the aligned portion.
2834 2834 */
2835 2835 while (dlimit - daddr >= sizeof (uint64_t)) {
2836 2836 *((uint64_t *)daddr) = *((uint64_t *)saddr);
2837 2837
2838 2838 daddr += sizeof (uint64_t);
2839 2839 saddr += sizeof (uint64_t);
2840 2840 }
2841 2841
2842 2842 /*
2843 2843 * Now any left-over bit...
2844 2844 */
2845 2845 while (dlimit - daddr)
2846 2846 *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
2847 2847
2848 2848 /*
2849 2849 * Finally, commit the reserved space in the destination buffer.
2850 2850 */
2851 2851 dest->dtb_offset = offs + src->dtb_offset;
2852 2852
2853 2853 out:
2854 2854 /*
2855 2855 * If we're lucky enough to be the only active CPU on this speculation
2856 2856 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2857 2857 */
2858 2858 if (current == DTRACESPEC_ACTIVE ||
2859 2859 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
2860 2860 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
2861 2861 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
2862 2862
2863 2863 ASSERT(rval == DTRACESPEC_COMMITTING);
2864 2864 }
2865 2865
2866 2866 src->dtb_offset = 0;
2867 2867 src->dtb_xamot_drops += src->dtb_drops;
2868 2868 src->dtb_drops = 0;
2869 2869 }
2870 2870
2871 2871 /*
2872 2872 * This routine discards an active speculation. If the specified speculation
2873 2873 * is not in a valid state to perform a discard(), this routine will silently
2874 2874 * do nothing. The state of the specified speculation is transitioned
2875 2875 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2876 2876 */
2877 2877 static void
2878 2878 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
2879 2879 dtrace_specid_t which)
2880 2880 {
2881 2881 dtrace_speculation_t *spec;
2882 2882 dtrace_speculation_state_t current, new;
2883 2883 dtrace_buffer_t *buf;
2884 2884
2885 2885 if (which == 0)
2886 2886 return;
2887 2887
2888 2888 if (which > state->dts_nspeculations) {
2889 2889 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2890 2890 return;
2891 2891 }
2892 2892
2893 2893 spec = &state->dts_speculations[which - 1];
2894 2894 buf = &spec->dtsp_buffer[cpu];
2895 2895
2896 2896 do {
2897 2897 current = spec->dtsp_state;
2898 2898
2899 2899 switch (current) {
2900 2900 case DTRACESPEC_INACTIVE:
2901 2901 case DTRACESPEC_COMMITTINGMANY:
2902 2902 case DTRACESPEC_COMMITTING:
2903 2903 case DTRACESPEC_DISCARDING:
2904 2904 return;
2905 2905
2906 2906 case DTRACESPEC_ACTIVE:
2907 2907 case DTRACESPEC_ACTIVEMANY:
2908 2908 new = DTRACESPEC_DISCARDING;
2909 2909 break;
2910 2910
2911 2911 case DTRACESPEC_ACTIVEONE:
2912 2912 if (buf->dtb_offset != 0) {
2913 2913 new = DTRACESPEC_INACTIVE;
2914 2914 } else {
2915 2915 new = DTRACESPEC_DISCARDING;
2916 2916 }
2917 2917 break;
2918 2918
2919 2919 default:
2920 2920 ASSERT(0);
2921 2921 }
2922 2922 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2923 2923 current, new) != current);
2924 2924
2925 2925 buf->dtb_offset = 0;
2926 2926 buf->dtb_drops = 0;
2927 2927 }
2928 2928
2929 2929 /*
2930 2930 * Note: not called from probe context. This function is called
2931 2931 * asynchronously from cross call context to clean any speculations that are
2932 2932 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2933 2933 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2934 2934 * speculation.
2935 2935 */
2936 2936 static void
2937 2937 dtrace_speculation_clean_here(dtrace_state_t *state)
2938 2938 {
2939 2939 dtrace_icookie_t cookie;
2940 2940 processorid_t cpu = CPU->cpu_id;
2941 2941 dtrace_buffer_t *dest = &state->dts_buffer[cpu];
2942 2942 dtrace_specid_t i;
2943 2943
2944 2944 cookie = dtrace_interrupt_disable();
2945 2945
2946 2946 if (dest->dtb_tomax == NULL) {
2947 2947 dtrace_interrupt_enable(cookie);
2948 2948 return;
2949 2949 }
2950 2950
2951 2951 for (i = 0; i < state->dts_nspeculations; i++) {
2952 2952 dtrace_speculation_t *spec = &state->dts_speculations[i];
2953 2953 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
2954 2954
2955 2955 if (src->dtb_tomax == NULL)
2956 2956 continue;
2957 2957
2958 2958 if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
2959 2959 src->dtb_offset = 0;
2960 2960 continue;
2961 2961 }
2962 2962
2963 2963 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2964 2964 continue;
2965 2965
2966 2966 if (src->dtb_offset == 0)
2967 2967 continue;
2968 2968
2969 2969 dtrace_speculation_commit(state, cpu, i + 1);
2970 2970 }
2971 2971
2972 2972 dtrace_interrupt_enable(cookie);
2973 2973 }
2974 2974
2975 2975 /*
2976 2976 * Note: not called from probe context. This function is called
2977 2977 * asynchronously (and at a regular interval) to clean any speculations that
2978 2978 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2979 2979 * is work to be done, it cross calls all CPUs to perform that work;
2980 2980 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2981 2981 * INACTIVE state until they have been cleaned by all CPUs.
2982 2982 */
2983 2983 static void
2984 2984 dtrace_speculation_clean(dtrace_state_t *state)
2985 2985 {
2986 2986 int work = 0, rv;
2987 2987 dtrace_specid_t i;
2988 2988
2989 2989 for (i = 0; i < state->dts_nspeculations; i++) {
2990 2990 dtrace_speculation_t *spec = &state->dts_speculations[i];
2991 2991
2992 2992 ASSERT(!spec->dtsp_cleaning);
2993 2993
2994 2994 if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
2995 2995 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2996 2996 continue;
2997 2997
2998 2998 work++;
2999 2999 spec->dtsp_cleaning = 1;
3000 3000 }
3001 3001
3002 3002 if (!work)
3003 3003 return;
3004 3004
3005 3005 dtrace_xcall(DTRACE_CPUALL,
3006 3006 (dtrace_xcall_t)dtrace_speculation_clean_here, state);
3007 3007
3008 3008 /*
3009 3009 * We now know that all CPUs have committed or discarded their
3010 3010 * speculation buffers, as appropriate. We can now set the state
3011 3011 * to inactive.
3012 3012 */
3013 3013 for (i = 0; i < state->dts_nspeculations; i++) {
3014 3014 dtrace_speculation_t *spec = &state->dts_speculations[i];
3015 3015 dtrace_speculation_state_t current, new;
3016 3016
3017 3017 if (!spec->dtsp_cleaning)
3018 3018 continue;
3019 3019
3020 3020 current = spec->dtsp_state;
3021 3021 ASSERT(current == DTRACESPEC_DISCARDING ||
3022 3022 current == DTRACESPEC_COMMITTINGMANY);
3023 3023
3024 3024 new = DTRACESPEC_INACTIVE;
3025 3025
3026 3026 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
3027 3027 ASSERT(rv == current);
3028 3028 spec->dtsp_cleaning = 0;
3029 3029 }
3030 3030 }
3031 3031
3032 3032 /*
3033 3033 * Called as part of a speculate() to get the speculative buffer associated
3034 3034 * with a given speculation. Returns NULL if the specified speculation is not
3035 3035 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
3036 3036 * the active CPU is not the specified CPU -- the speculation will be
3037 3037 * atomically transitioned into the ACTIVEMANY state.
3038 3038 */
3039 3039 static dtrace_buffer_t *
3040 3040 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
3041 3041 dtrace_specid_t which)
3042 3042 {
3043 3043 dtrace_speculation_t *spec;
3044 3044 dtrace_speculation_state_t current, new;
3045 3045 dtrace_buffer_t *buf;
3046 3046
3047 3047 if (which == 0)
3048 3048 return (NULL);
3049 3049
3050 3050 if (which > state->dts_nspeculations) {
3051 3051 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
3052 3052 return (NULL);
3053 3053 }
3054 3054
3055 3055 spec = &state->dts_speculations[which - 1];
3056 3056 buf = &spec->dtsp_buffer[cpuid];
3057 3057
3058 3058 do {
3059 3059 current = spec->dtsp_state;
3060 3060
3061 3061 switch (current) {
3062 3062 case DTRACESPEC_INACTIVE:
3063 3063 case DTRACESPEC_COMMITTINGMANY:
3064 3064 case DTRACESPEC_DISCARDING:
3065 3065 return (NULL);
3066 3066
3067 3067 case DTRACESPEC_COMMITTING:
3068 3068 ASSERT(buf->dtb_offset == 0);
3069 3069 return (NULL);
3070 3070
3071 3071 case DTRACESPEC_ACTIVEONE:
3072 3072 /*
3073 3073 * This speculation is currently active on one CPU.
3074 3074 * Check the offset in the buffer; if it's non-zero,
3075 3075 * that CPU must be us (and we leave the state alone).
3076 3076 * If it's zero, assume that we're starting on a new
3077 3077 * CPU -- and change the state to indicate that the
3078 3078 * speculation is active on more than one CPU.
3079 3079 */
3080 3080 if (buf->dtb_offset != 0)
3081 3081 return (buf);
3082 3082
3083 3083 new = DTRACESPEC_ACTIVEMANY;
3084 3084 break;
3085 3085
3086 3086 case DTRACESPEC_ACTIVEMANY:
3087 3087 return (buf);
3088 3088
3089 3089 case DTRACESPEC_ACTIVE:
3090 3090 new = DTRACESPEC_ACTIVEONE;
3091 3091 break;
3092 3092
3093 3093 default:
3094 3094 ASSERT(0);
3095 3095 }
3096 3096 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
3097 3097 current, new) != current);
3098 3098
3099 3099 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
3100 3100 return (buf);
3101 3101 }
3102 3102
3103 3103 /*
3104 3104 * Return a string. In the event that the user lacks the privilege to access
3105 3105 * arbitrary kernel memory, we copy the string out to scratch memory so that we
3106 3106 * don't fail access checking.
3107 3107 *
3108 3108 * dtrace_dif_variable() uses this routine as a helper for various
3109 3109 * builtin values such as 'execname' and 'probefunc.'
3110 3110 */
3111 3111 uintptr_t
3112 3112 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
3113 3113 dtrace_mstate_t *mstate)
3114 3114 {
3115 3115 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3116 3116 uintptr_t ret;
3117 3117 size_t strsz;
3118 3118
3119 3119 /*
3120 3120 * The easy case: this probe is allowed to read all of memory, so
3121 3121 * we can just return this as a vanilla pointer.
3122 3122 */
3123 3123 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
3124 3124 return (addr);
3125 3125
3126 3126 /*
3127 3127 * This is the tougher case: we copy the string in question from
3128 3128 * kernel memory into scratch memory and return it that way: this
3129 3129 * ensures that we won't trip up when access checking tests the
3130 3130 * BYREF return value.
3131 3131 */
3132 3132 strsz = dtrace_strlen((char *)addr, size) + 1;
3133 3133
3134 3134 if (mstate->dtms_scratch_ptr + strsz >
3135 3135 mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
3136 3136 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3137 3137 return (NULL);
3138 3138 }
3139 3139
3140 3140 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
3141 3141 strsz);
3142 3142 ret = mstate->dtms_scratch_ptr;
3143 3143 mstate->dtms_scratch_ptr += strsz;
3144 3144 return (ret);
3145 3145 }
3146 3146
3147 3147 /*
3148 3148 * This function implements the DIF emulator's variable lookups. The emulator
3149 3149 * passes a reserved variable identifier and optional built-in array index.
3150 3150 */
3151 3151 static uint64_t
3152 3152 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
3153 3153 uint64_t ndx)
3154 3154 {
3155 3155 /*
3156 3156 * If we're accessing one of the uncached arguments, we'll turn this
3157 3157 * into a reference in the args array.
3158 3158 */
3159 3159 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
3160 3160 ndx = v - DIF_VAR_ARG0;
3161 3161 v = DIF_VAR_ARGS;
3162 3162 }
3163 3163
3164 3164 switch (v) {
3165 3165 case DIF_VAR_ARGS:
3166 3166 if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
3167 3167 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
3168 3168 CPU_DTRACE_KPRIV;
3169 3169 return (0);
3170 3170 }
3171 3171
3172 3172 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
3173 3173 if (ndx >= sizeof (mstate->dtms_arg) /
3174 3174 sizeof (mstate->dtms_arg[0])) {
3175 3175 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3176 3176 dtrace_provider_t *pv;
3177 3177 uint64_t val;
3178 3178
3179 3179 pv = mstate->dtms_probe->dtpr_provider;
3180 3180 if (pv->dtpv_pops.dtps_getargval != NULL)
3181 3181 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
3182 3182 mstate->dtms_probe->dtpr_id,
3183 3183 mstate->dtms_probe->dtpr_arg, ndx, aframes);
3184 3184 else
3185 3185 val = dtrace_getarg(ndx, aframes);
3186 3186
3187 3187 /*
3188 3188 * This is regrettably required to keep the compiler
3189 3189 * from tail-optimizing the call to dtrace_getarg().
3190 3190 * The condition always evaluates to true, but the
3191 3191 * compiler has no way of figuring that out a priori.
3192 3192 * (None of this would be necessary if the compiler
3193 3193 * could be relied upon to _always_ tail-optimize
3194 3194 * the call to dtrace_getarg() -- but it can't.)
3195 3195 */
3196 3196 if (mstate->dtms_probe != NULL)
3197 3197 return (val);
3198 3198
3199 3199 ASSERT(0);
3200 3200 }
3201 3201
3202 3202 return (mstate->dtms_arg[ndx]);
3203 3203
3204 3204 case DIF_VAR_UREGS: {
3205 3205 klwp_t *lwp;
3206 3206
3207 3207 if (!dtrace_priv_proc(state, mstate))
3208 3208 return (0);
3209 3209
3210 3210 if ((lwp = curthread->t_lwp) == NULL) {
3211 3211 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
3212 3212 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL;
3213 3213 return (0);
3214 3214 }
3215 3215
3216 3216 return (dtrace_getreg(lwp->lwp_regs, ndx));
3217 3217 }
3218 3218
3219 3219 case DIF_VAR_VMREGS: {
3220 3220 uint64_t rval;
3221 3221
3222 3222 if (!dtrace_priv_kernel(state))
3223 3223 return (0);
3224 3224
3225 3225 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3226 3226
3227 3227 rval = dtrace_getvmreg(ndx,
3228 3228 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
3229 3229
3230 3230 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3231 3231
3232 3232 return (rval);
3233 3233 }
3234 3234
3235 3235 case DIF_VAR_CURTHREAD:
3236 3236 if (!dtrace_priv_proc(state, mstate))
3237 3237 return (0);
3238 3238 return ((uint64_t)(uintptr_t)curthread);
3239 3239
3240 3240 case DIF_VAR_TIMESTAMP:
3241 3241 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
3242 3242 mstate->dtms_timestamp = dtrace_gethrtime();
3243 3243 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
3244 3244 }
3245 3245 return (mstate->dtms_timestamp);
3246 3246
3247 3247 case DIF_VAR_VTIMESTAMP:
3248 3248 ASSERT(dtrace_vtime_references != 0);
3249 3249 return (curthread->t_dtrace_vtime);
3250 3250
3251 3251 case DIF_VAR_WALLTIMESTAMP:
3252 3252 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
3253 3253 mstate->dtms_walltimestamp = dtrace_gethrestime();
3254 3254 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
3255 3255 }
3256 3256 return (mstate->dtms_walltimestamp);
3257 3257
3258 3258 case DIF_VAR_IPL:
3259 3259 if (!dtrace_priv_kernel(state))
3260 3260 return (0);
3261 3261 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
3262 3262 mstate->dtms_ipl = dtrace_getipl();
3263 3263 mstate->dtms_present |= DTRACE_MSTATE_IPL;
3264 3264 }
3265 3265 return (mstate->dtms_ipl);
3266 3266
3267 3267 case DIF_VAR_EPID:
3268 3268 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
3269 3269 return (mstate->dtms_epid);
3270 3270
3271 3271 case DIF_VAR_ID:
3272 3272 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3273 3273 return (mstate->dtms_probe->dtpr_id);
3274 3274
3275 3275 case DIF_VAR_STACKDEPTH:
3276 3276 if (!dtrace_priv_kernel(state))
3277 3277 return (0);
3278 3278 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
3279 3279 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3280 3280
3281 3281 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
3282 3282 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
3283 3283 }
3284 3284 return (mstate->dtms_stackdepth);
3285 3285
3286 3286 case DIF_VAR_USTACKDEPTH:
3287 3287 if (!dtrace_priv_proc(state, mstate))
3288 3288 return (0);
3289 3289 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
3290 3290 /*
3291 3291 * See comment in DIF_VAR_PID.
3292 3292 */
3293 3293 if (DTRACE_ANCHORED(mstate->dtms_probe) &&
3294 3294 CPU_ON_INTR(CPU)) {
3295 3295 mstate->dtms_ustackdepth = 0;
3296 3296 } else {
3297 3297 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3298 3298 mstate->dtms_ustackdepth =
3299 3299 dtrace_getustackdepth();
3300 3300 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3301 3301 }
3302 3302 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
3303 3303 }
3304 3304 return (mstate->dtms_ustackdepth);
3305 3305
3306 3306 case DIF_VAR_CALLER:
3307 3307 if (!dtrace_priv_kernel(state))
3308 3308 return (0);
3309 3309 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
3310 3310 int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3311 3311
3312 3312 if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
3313 3313 /*
3314 3314 * If this is an unanchored probe, we are
3315 3315 * required to go through the slow path:
3316 3316 * dtrace_caller() only guarantees correct
3317 3317 * results for anchored probes.
3318 3318 */
3319 3319 pc_t caller[2];
3320 3320
3321 3321 dtrace_getpcstack(caller, 2, aframes,
3322 3322 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
3323 3323 mstate->dtms_caller = caller[1];
3324 3324 } else if ((mstate->dtms_caller =
3325 3325 dtrace_caller(aframes)) == -1) {
3326 3326 /*
3327 3327 * We have failed to do this the quick way;
3328 3328 * we must resort to the slower approach of
3329 3329 * calling dtrace_getpcstack().
3330 3330 */
3331 3331 pc_t caller;
3332 3332
3333 3333 dtrace_getpcstack(&caller, 1, aframes, NULL);
3334 3334 mstate->dtms_caller = caller;
3335 3335 }
3336 3336
3337 3337 mstate->dtms_present |= DTRACE_MSTATE_CALLER;
3338 3338 }
3339 3339 return (mstate->dtms_caller);
3340 3340
3341 3341 case DIF_VAR_UCALLER:
3342 3342 if (!dtrace_priv_proc(state, mstate))
3343 3343 return (0);
3344 3344
3345 3345 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3346 3346 uint64_t ustack[3];
3347 3347
3348 3348 /*
3349 3349 * dtrace_getupcstack() fills in the first uint64_t
3350 3350 * with the current PID. The second uint64_t will
3351 3351 * be the program counter at user-level. The third
3352 3352 * uint64_t will contain the caller, which is what
3353 3353 * we're after.
3354 3354 */
3355 3355 ustack[2] = NULL;
3356 3356 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3357 3357 dtrace_getupcstack(ustack, 3);
3358 3358 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3359 3359 mstate->dtms_ucaller = ustack[2];
3360 3360 mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3361 3361 }
3362 3362
3363 3363 return (mstate->dtms_ucaller);
3364 3364
3365 3365 case DIF_VAR_PROBEPROV:
3366 3366 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3367 3367 return (dtrace_dif_varstr(
3368 3368 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3369 3369 state, mstate));
3370 3370
3371 3371 case DIF_VAR_PROBEMOD:
3372 3372 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3373 3373 return (dtrace_dif_varstr(
3374 3374 (uintptr_t)mstate->dtms_probe->dtpr_mod,
3375 3375 state, mstate));
3376 3376
3377 3377 case DIF_VAR_PROBEFUNC:
3378 3378 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3379 3379 return (dtrace_dif_varstr(
3380 3380 (uintptr_t)mstate->dtms_probe->dtpr_func,
3381 3381 state, mstate));
3382 3382
3383 3383 case DIF_VAR_PROBENAME:
3384 3384 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3385 3385 return (dtrace_dif_varstr(
3386 3386 (uintptr_t)mstate->dtms_probe->dtpr_name,
3387 3387 state, mstate));
3388 3388
3389 3389 case DIF_VAR_PID:
3390 3390 if (!dtrace_priv_proc(state, mstate))
3391 3391 return (0);
3392 3392
3393 3393 /*
3394 3394 * Note that we are assuming that an unanchored probe is
3395 3395 * always due to a high-level interrupt. (And we're assuming
3396 3396 * that there is only a single high level interrupt.)
3397 3397 */
3398 3398 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3399 3399 return (pid0.pid_id);
3400 3400
3401 3401 /*
3402 3402 * It is always safe to dereference one's own t_procp pointer:
3403 3403 * it always points to a valid, allocated proc structure.
3404 3404 * Further, it is always safe to dereference the p_pidp member
3405 3405 * of one's own proc structure. (These are truisms becuase
3406 3406 * threads and processes don't clean up their own state --
3407 3407 * they leave that task to whomever reaps them.)
3408 3408 */
3409 3409 return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
3410 3410
3411 3411 case DIF_VAR_PPID:
3412 3412 if (!dtrace_priv_proc(state, mstate))
3413 3413 return (0);
3414 3414
3415 3415 /*
3416 3416 * See comment in DIF_VAR_PID.
3417 3417 */
3418 3418 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3419 3419 return (pid0.pid_id);
3420 3420
3421 3421 /*
3422 3422 * It is always safe to dereference one's own t_procp pointer:
3423 3423 * it always points to a valid, allocated proc structure.
3424 3424 * (This is true because threads don't clean up their own
3425 3425 * state -- they leave that task to whomever reaps them.)
3426 3426 */
3427 3427 return ((uint64_t)curthread->t_procp->p_ppid);
3428 3428
3429 3429 case DIF_VAR_TID:
3430 3430 /*
3431 3431 * See comment in DIF_VAR_PID.
3432 3432 */
3433 3433 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3434 3434 return (0);
3435 3435
3436 3436 return ((uint64_t)curthread->t_tid);
3437 3437
3438 3438 case DIF_VAR_EXECNAME:
3439 3439 if (!dtrace_priv_proc(state, mstate))
3440 3440 return (0);
3441 3441
3442 3442 /*
3443 3443 * See comment in DIF_VAR_PID.
3444 3444 */
3445 3445 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3446 3446 return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
3447 3447
3448 3448 /*
3449 3449 * It is always safe to dereference one's own t_procp pointer:
3450 3450 * it always points to a valid, allocated proc structure.
3451 3451 * (This is true because threads don't clean up their own
3452 3452 * state -- they leave that task to whomever reaps them.)
3453 3453 */
3454 3454 return (dtrace_dif_varstr(
3455 3455 (uintptr_t)curthread->t_procp->p_user.u_comm,
3456 3456 state, mstate));
3457 3457
3458 3458 case DIF_VAR_ZONENAME:
3459 3459 if (!dtrace_priv_proc(state, mstate))
3460 3460 return (0);
3461 3461
3462 3462 /*
3463 3463 * See comment in DIF_VAR_PID.
3464 3464 */
3465 3465 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3466 3466 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
3467 3467
3468 3468 /*
3469 3469 * It is always safe to dereference one's own t_procp pointer:
3470 3470 * it always points to a valid, allocated proc structure.
3471 3471 * (This is true because threads don't clean up their own
3472 3472 * state -- they leave that task to whomever reaps them.)
3473 3473 */
3474 3474 return (dtrace_dif_varstr(
3475 3475 (uintptr_t)curthread->t_procp->p_zone->zone_name,
3476 3476 state, mstate));
3477 3477
3478 3478 case DIF_VAR_UID:
3479 3479 if (!dtrace_priv_proc(state, mstate))
3480 3480 return (0);
3481 3481
3482 3482 /*
3483 3483 * See comment in DIF_VAR_PID.
3484 3484 */
3485 3485 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3486 3486 return ((uint64_t)p0.p_cred->cr_uid);
3487 3487
3488 3488 /*
3489 3489 * It is always safe to dereference one's own t_procp pointer:
3490 3490 * it always points to a valid, allocated proc structure.
3491 3491 * (This is true because threads don't clean up their own
3492 3492 * state -- they leave that task to whomever reaps them.)
3493 3493 *
3494 3494 * Additionally, it is safe to dereference one's own process
3495 3495 * credential, since this is never NULL after process birth.
3496 3496 */
3497 3497 return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
3498 3498
3499 3499 case DIF_VAR_GID:
3500 3500 if (!dtrace_priv_proc(state, mstate))
3501 3501 return (0);
3502 3502
3503 3503 /*
3504 3504 * See comment in DIF_VAR_PID.
3505 3505 */
3506 3506 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3507 3507 return ((uint64_t)p0.p_cred->cr_gid);
3508 3508
3509 3509 /*
3510 3510 * It is always safe to dereference one's own t_procp pointer:
3511 3511 * it always points to a valid, allocated proc structure.
3512 3512 * (This is true because threads don't clean up their own
3513 3513 * state -- they leave that task to whomever reaps them.)
3514 3514 *
3515 3515 * Additionally, it is safe to dereference one's own process
3516 3516 * credential, since this is never NULL after process birth.
3517 3517 */
3518 3518 return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
3519 3519
3520 3520 case DIF_VAR_ERRNO: {
3521 3521 klwp_t *lwp;
3522 3522 if (!dtrace_priv_proc(state, mstate))
3523 3523 return (0);
3524 3524
3525 3525 /*
3526 3526 * See comment in DIF_VAR_PID.
3527 3527 */
3528 3528 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3529 3529 return (0);
3530 3530
3531 3531 /*
↓ open down ↓ |
3497 lines elided |
↑ open up ↑ |
3532 3532 * It is always safe to dereference one's own t_lwp pointer in
3533 3533 * the event that this pointer is non-NULL. (This is true
3534 3534 * because threads and lwps don't clean up their own state --
3535 3535 * they leave that task to whomever reaps them.)
3536 3536 */
3537 3537 if ((lwp = curthread->t_lwp) == NULL)
3538 3538 return (0);
3539 3539
3540 3540 return ((uint64_t)lwp->lwp_errno);
3541 3541 }
3542 +
3543 + case DIF_VAR_THREADNAME:
3544 + /*
3545 + * See comment in DIF_VAR_PID.
3546 + */
3547 + if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3548 + return (0);
3549 +
3550 + if (curthread->t_name == NULL)
3551 + return (0);
3552 +
3553 + /*
3554 + * Once set, ->t_name itself is never changed: any updates are
3555 + * made to the same buffer that we are pointing out. So we are
3556 + * safe to dereference it here.
3557 + */
3558 + return (dtrace_dif_varstr((uintptr_t)curthread->t_name,
3559 + state, mstate));
3560 +
3542 3561 default:
3543 3562 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3544 3563 return (0);
3545 3564 }
3546 3565 }
3547 3566
3548 3567 static void
3549 3568 dtrace_dif_variable_write(dtrace_mstate_t *mstate, dtrace_state_t *state,
3550 3569 uint64_t v, uint64_t ndx, uint64_t data)
3551 3570 {
3552 3571 switch (v) {
3553 3572 case DIF_VAR_UREGS: {
3554 3573 klwp_t *lwp;
3555 3574
3556 3575 if (dtrace_destructive_disallow ||
3557 3576 !dtrace_priv_proc_control(state, mstate)) {
3558 3577 return;
3559 3578 }
3560 3579
3561 3580 if ((lwp = curthread->t_lwp) == NULL) {
3562 3581 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
3563 3582 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL;
3564 3583 return;
3565 3584 }
3566 3585
3567 3586 dtrace_setreg(lwp->lwp_regs, ndx, data);
3568 3587 return;
3569 3588 }
3570 3589
3571 3590 default:
3572 3591 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3573 3592 return;
3574 3593 }
3575 3594 }
3576 3595
3577 3596 typedef enum dtrace_json_state {
3578 3597 DTRACE_JSON_REST = 1,
3579 3598 DTRACE_JSON_OBJECT,
3580 3599 DTRACE_JSON_STRING,
3581 3600 DTRACE_JSON_STRING_ESCAPE,
3582 3601 DTRACE_JSON_STRING_ESCAPE_UNICODE,
3583 3602 DTRACE_JSON_COLON,
3584 3603 DTRACE_JSON_COMMA,
3585 3604 DTRACE_JSON_VALUE,
3586 3605 DTRACE_JSON_IDENTIFIER,
3587 3606 DTRACE_JSON_NUMBER,
3588 3607 DTRACE_JSON_NUMBER_FRAC,
3589 3608 DTRACE_JSON_NUMBER_EXP,
3590 3609 DTRACE_JSON_COLLECT_OBJECT
3591 3610 } dtrace_json_state_t;
3592 3611
3593 3612 /*
3594 3613 * This function possesses just enough knowledge about JSON to extract a single
3595 3614 * value from a JSON string and store it in the scratch buffer. It is able
3596 3615 * to extract nested object values, and members of arrays by index.
3597 3616 *
3598 3617 * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
3599 3618 * be looked up as we descend into the object tree. e.g.
3600 3619 *
3601 3620 * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
3602 3621 * with nelems = 5.
3603 3622 *
3604 3623 * The run time of this function must be bounded above by strsize to limit the
3605 3624 * amount of work done in probe context. As such, it is implemented as a
3606 3625 * simple state machine, reading one character at a time using safe loads
3607 3626 * until we find the requested element, hit a parsing error or run off the
3608 3627 * end of the object or string.
3609 3628 *
3610 3629 * As there is no way for a subroutine to return an error without interrupting
3611 3630 * clause execution, we simply return NULL in the event of a missing key or any
3612 3631 * other error condition. Each NULL return in this function is commented with
3613 3632 * the error condition it represents -- parsing or otherwise.
3614 3633 *
3615 3634 * The set of states for the state machine closely matches the JSON
3616 3635 * specification (http://json.org/). Briefly:
3617 3636 *
3618 3637 * DTRACE_JSON_REST:
3619 3638 * Skip whitespace until we find either a top-level Object, moving
3620 3639 * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE.
3621 3640 *
3622 3641 * DTRACE_JSON_OBJECT:
3623 3642 * Locate the next key String in an Object. Sets a flag to denote
3624 3643 * the next String as a key string and moves to DTRACE_JSON_STRING.
3625 3644 *
3626 3645 * DTRACE_JSON_COLON:
3627 3646 * Skip whitespace until we find the colon that separates key Strings
3628 3647 * from their values. Once found, move to DTRACE_JSON_VALUE.
3629 3648 *
3630 3649 * DTRACE_JSON_VALUE:
3631 3650 * Detects the type of the next value (String, Number, Identifier, Object
3632 3651 * or Array) and routes to the states that process that type. Here we also
3633 3652 * deal with the element selector list if we are requested to traverse down
3634 3653 * into the object tree.
3635 3654 *
3636 3655 * DTRACE_JSON_COMMA:
3637 3656 * Skip whitespace until we find the comma that separates key-value pairs
3638 3657 * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays
3639 3658 * (similarly DTRACE_JSON_VALUE). All following literal value processing
3640 3659 * states return to this state at the end of their value, unless otherwise
3641 3660 * noted.
3642 3661 *
3643 3662 * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP:
3644 3663 * Processes a Number literal from the JSON, including any exponent
3645 3664 * component that may be present. Numbers are returned as strings, which
3646 3665 * may be passed to strtoll() if an integer is required.
3647 3666 *
3648 3667 * DTRACE_JSON_IDENTIFIER:
3649 3668 * Processes a "true", "false" or "null" literal in the JSON.
3650 3669 *
3651 3670 * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE,
3652 3671 * DTRACE_JSON_STRING_ESCAPE_UNICODE:
3653 3672 * Processes a String literal from the JSON, whether the String denotes
3654 3673 * a key, a value or part of a larger Object. Handles all escape sequences
3655 3674 * present in the specification, including four-digit unicode characters,
3656 3675 * but merely includes the escape sequence without converting it to the
3657 3676 * actual escaped character. If the String is flagged as a key, we
3658 3677 * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA.
3659 3678 *
3660 3679 * DTRACE_JSON_COLLECT_OBJECT:
3661 3680 * This state collects an entire Object (or Array), correctly handling
3662 3681 * embedded strings. If the full element selector list matches this nested
3663 3682 * object, we return the Object in full as a string. If not, we use this
3664 3683 * state to skip to the next value at this level and continue processing.
3665 3684 *
3666 3685 * NOTE: This function uses various macros from strtolctype.h to manipulate
3667 3686 * digit values, etc -- these have all been checked to ensure they make
3668 3687 * no additional function calls.
3669 3688 */
3670 3689 static char *
3671 3690 dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
3672 3691 char *dest)
3673 3692 {
3674 3693 dtrace_json_state_t state = DTRACE_JSON_REST;
3675 3694 int64_t array_elem = INT64_MIN;
3676 3695 int64_t array_pos = 0;
3677 3696 uint8_t escape_unicount = 0;
3678 3697 boolean_t string_is_key = B_FALSE;
3679 3698 boolean_t collect_object = B_FALSE;
3680 3699 boolean_t found_key = B_FALSE;
3681 3700 boolean_t in_array = B_FALSE;
3682 3701 uint32_t braces = 0, brackets = 0;
3683 3702 char *elem = elemlist;
3684 3703 char *dd = dest;
3685 3704 uintptr_t cur;
3686 3705
3687 3706 for (cur = json; cur < json + size; cur++) {
3688 3707 char cc = dtrace_load8(cur);
3689 3708 if (cc == '\0')
3690 3709 return (NULL);
3691 3710
3692 3711 switch (state) {
3693 3712 case DTRACE_JSON_REST:
3694 3713 if (isspace(cc))
3695 3714 break;
3696 3715
3697 3716 if (cc == '{') {
3698 3717 state = DTRACE_JSON_OBJECT;
3699 3718 break;
3700 3719 }
3701 3720
3702 3721 if (cc == '[') {
3703 3722 in_array = B_TRUE;
3704 3723 array_pos = 0;
3705 3724 array_elem = dtrace_strtoll(elem, 10, size);
3706 3725 found_key = array_elem == 0 ? B_TRUE : B_FALSE;
3707 3726 state = DTRACE_JSON_VALUE;
3708 3727 break;
3709 3728 }
3710 3729
3711 3730 /*
3712 3731 * ERROR: expected to find a top-level object or array.
3713 3732 */
3714 3733 return (NULL);
3715 3734 case DTRACE_JSON_OBJECT:
3716 3735 if (isspace(cc))
3717 3736 break;
3718 3737
3719 3738 if (cc == '"') {
3720 3739 state = DTRACE_JSON_STRING;
3721 3740 string_is_key = B_TRUE;
3722 3741 break;
3723 3742 }
3724 3743
3725 3744 /*
3726 3745 * ERROR: either the object did not start with a key
3727 3746 * string, or we've run off the end of the object
3728 3747 * without finding the requested key.
3729 3748 */
3730 3749 return (NULL);
3731 3750 case DTRACE_JSON_STRING:
3732 3751 if (cc == '\\') {
3733 3752 *dd++ = '\\';
3734 3753 state = DTRACE_JSON_STRING_ESCAPE;
3735 3754 break;
3736 3755 }
3737 3756
3738 3757 if (cc == '"') {
3739 3758 if (collect_object) {
3740 3759 /*
3741 3760 * We don't reset the dest here, as
3742 3761 * the string is part of a larger
3743 3762 * object being collected.
3744 3763 */
3745 3764 *dd++ = cc;
3746 3765 collect_object = B_FALSE;
3747 3766 state = DTRACE_JSON_COLLECT_OBJECT;
3748 3767 break;
3749 3768 }
3750 3769 *dd = '\0';
3751 3770 dd = dest; /* reset string buffer */
3752 3771 if (string_is_key) {
3753 3772 if (dtrace_strncmp(dest, elem,
3754 3773 size) == 0)
3755 3774 found_key = B_TRUE;
3756 3775 } else if (found_key) {
3757 3776 if (nelems > 1) {
3758 3777 /*
3759 3778 * We expected an object, not
3760 3779 * this string.
3761 3780 */
3762 3781 return (NULL);
3763 3782 }
3764 3783 return (dest);
3765 3784 }
3766 3785 state = string_is_key ? DTRACE_JSON_COLON :
3767 3786 DTRACE_JSON_COMMA;
3768 3787 string_is_key = B_FALSE;
3769 3788 break;
3770 3789 }
3771 3790
3772 3791 *dd++ = cc;
3773 3792 break;
3774 3793 case DTRACE_JSON_STRING_ESCAPE:
3775 3794 *dd++ = cc;
3776 3795 if (cc == 'u') {
3777 3796 escape_unicount = 0;
3778 3797 state = DTRACE_JSON_STRING_ESCAPE_UNICODE;
3779 3798 } else {
3780 3799 state = DTRACE_JSON_STRING;
3781 3800 }
3782 3801 break;
3783 3802 case DTRACE_JSON_STRING_ESCAPE_UNICODE:
3784 3803 if (!isxdigit(cc)) {
3785 3804 /*
3786 3805 * ERROR: invalid unicode escape, expected
3787 3806 * four valid hexidecimal digits.
3788 3807 */
3789 3808 return (NULL);
3790 3809 }
3791 3810
3792 3811 *dd++ = cc;
3793 3812 if (++escape_unicount == 4)
3794 3813 state = DTRACE_JSON_STRING;
3795 3814 break;
3796 3815 case DTRACE_JSON_COLON:
3797 3816 if (isspace(cc))
3798 3817 break;
3799 3818
3800 3819 if (cc == ':') {
3801 3820 state = DTRACE_JSON_VALUE;
3802 3821 break;
3803 3822 }
3804 3823
3805 3824 /*
3806 3825 * ERROR: expected a colon.
3807 3826 */
3808 3827 return (NULL);
3809 3828 case DTRACE_JSON_COMMA:
3810 3829 if (isspace(cc))
3811 3830 break;
3812 3831
3813 3832 if (cc == ',') {
3814 3833 if (in_array) {
3815 3834 state = DTRACE_JSON_VALUE;
3816 3835 if (++array_pos == array_elem)
3817 3836 found_key = B_TRUE;
3818 3837 } else {
3819 3838 state = DTRACE_JSON_OBJECT;
3820 3839 }
3821 3840 break;
3822 3841 }
3823 3842
3824 3843 /*
3825 3844 * ERROR: either we hit an unexpected character, or
3826 3845 * we reached the end of the object or array without
3827 3846 * finding the requested key.
3828 3847 */
3829 3848 return (NULL);
3830 3849 case DTRACE_JSON_IDENTIFIER:
3831 3850 if (islower(cc)) {
3832 3851 *dd++ = cc;
3833 3852 break;
3834 3853 }
3835 3854
3836 3855 *dd = '\0';
3837 3856 dd = dest; /* reset string buffer */
3838 3857
3839 3858 if (dtrace_strncmp(dest, "true", 5) == 0 ||
3840 3859 dtrace_strncmp(dest, "false", 6) == 0 ||
3841 3860 dtrace_strncmp(dest, "null", 5) == 0) {
3842 3861 if (found_key) {
3843 3862 if (nelems > 1) {
3844 3863 /*
3845 3864 * ERROR: We expected an object,
3846 3865 * not this identifier.
3847 3866 */
3848 3867 return (NULL);
3849 3868 }
3850 3869 return (dest);
3851 3870 } else {
3852 3871 cur--;
3853 3872 state = DTRACE_JSON_COMMA;
3854 3873 break;
3855 3874 }
3856 3875 }
3857 3876
3858 3877 /*
3859 3878 * ERROR: we did not recognise the identifier as one
3860 3879 * of those in the JSON specification.
3861 3880 */
3862 3881 return (NULL);
3863 3882 case DTRACE_JSON_NUMBER:
3864 3883 if (cc == '.') {
3865 3884 *dd++ = cc;
3866 3885 state = DTRACE_JSON_NUMBER_FRAC;
3867 3886 break;
3868 3887 }
3869 3888
3870 3889 if (cc == 'x' || cc == 'X') {
3871 3890 /*
3872 3891 * ERROR: specification explicitly excludes
3873 3892 * hexidecimal or octal numbers.
3874 3893 */
3875 3894 return (NULL);
3876 3895 }
3877 3896
3878 3897 /* FALLTHRU */
3879 3898 case DTRACE_JSON_NUMBER_FRAC:
3880 3899 if (cc == 'e' || cc == 'E') {
3881 3900 *dd++ = cc;
3882 3901 state = DTRACE_JSON_NUMBER_EXP;
3883 3902 break;
3884 3903 }
3885 3904
3886 3905 if (cc == '+' || cc == '-') {
3887 3906 /*
3888 3907 * ERROR: expect sign as part of exponent only.
3889 3908 */
3890 3909 return (NULL);
3891 3910 }
3892 3911 /* FALLTHRU */
3893 3912 case DTRACE_JSON_NUMBER_EXP:
3894 3913 if (isdigit(cc) || cc == '+' || cc == '-') {
3895 3914 *dd++ = cc;
3896 3915 break;
3897 3916 }
3898 3917
3899 3918 *dd = '\0';
3900 3919 dd = dest; /* reset string buffer */
3901 3920 if (found_key) {
3902 3921 if (nelems > 1) {
3903 3922 /*
3904 3923 * ERROR: We expected an object, not
3905 3924 * this number.
3906 3925 */
3907 3926 return (NULL);
3908 3927 }
3909 3928 return (dest);
3910 3929 }
3911 3930
3912 3931 cur--;
3913 3932 state = DTRACE_JSON_COMMA;
3914 3933 break;
3915 3934 case DTRACE_JSON_VALUE:
3916 3935 if (isspace(cc))
3917 3936 break;
3918 3937
3919 3938 if (cc == '{' || cc == '[') {
3920 3939 if (nelems > 1 && found_key) {
3921 3940 in_array = cc == '[' ? B_TRUE : B_FALSE;
3922 3941 /*
3923 3942 * If our element selector directs us
3924 3943 * to descend into this nested object,
3925 3944 * then move to the next selector
3926 3945 * element in the list and restart the
3927 3946 * state machine.
3928 3947 */
3929 3948 while (*elem != '\0')
3930 3949 elem++;
3931 3950 elem++; /* skip the inter-element NUL */
3932 3951 nelems--;
3933 3952 dd = dest;
3934 3953 if (in_array) {
3935 3954 state = DTRACE_JSON_VALUE;
3936 3955 array_pos = 0;
3937 3956 array_elem = dtrace_strtoll(
3938 3957 elem, 10, size);
3939 3958 found_key = array_elem == 0 ?
3940 3959 B_TRUE : B_FALSE;
3941 3960 } else {
3942 3961 found_key = B_FALSE;
3943 3962 state = DTRACE_JSON_OBJECT;
3944 3963 }
3945 3964 break;
3946 3965 }
3947 3966
3948 3967 /*
3949 3968 * Otherwise, we wish to either skip this
3950 3969 * nested object or return it in full.
3951 3970 */
3952 3971 if (cc == '[')
3953 3972 brackets = 1;
3954 3973 else
3955 3974 braces = 1;
3956 3975 *dd++ = cc;
3957 3976 state = DTRACE_JSON_COLLECT_OBJECT;
3958 3977 break;
3959 3978 }
3960 3979
3961 3980 if (cc == '"') {
3962 3981 state = DTRACE_JSON_STRING;
3963 3982 break;
3964 3983 }
3965 3984
3966 3985 if (islower(cc)) {
3967 3986 /*
3968 3987 * Here we deal with true, false and null.
3969 3988 */
3970 3989 *dd++ = cc;
3971 3990 state = DTRACE_JSON_IDENTIFIER;
3972 3991 break;
3973 3992 }
3974 3993
3975 3994 if (cc == '-' || isdigit(cc)) {
3976 3995 *dd++ = cc;
3977 3996 state = DTRACE_JSON_NUMBER;
3978 3997 break;
3979 3998 }
3980 3999
3981 4000 /*
3982 4001 * ERROR: unexpected character at start of value.
3983 4002 */
3984 4003 return (NULL);
3985 4004 case DTRACE_JSON_COLLECT_OBJECT:
3986 4005 if (cc == '\0')
3987 4006 /*
3988 4007 * ERROR: unexpected end of input.
3989 4008 */
3990 4009 return (NULL);
3991 4010
3992 4011 *dd++ = cc;
3993 4012 if (cc == '"') {
3994 4013 collect_object = B_TRUE;
3995 4014 state = DTRACE_JSON_STRING;
3996 4015 break;
3997 4016 }
3998 4017
3999 4018 if (cc == ']') {
4000 4019 if (brackets-- == 0) {
4001 4020 /*
4002 4021 * ERROR: unbalanced brackets.
4003 4022 */
4004 4023 return (NULL);
4005 4024 }
4006 4025 } else if (cc == '}') {
4007 4026 if (braces-- == 0) {
4008 4027 /*
4009 4028 * ERROR: unbalanced braces.
4010 4029 */
4011 4030 return (NULL);
4012 4031 }
4013 4032 } else if (cc == '{') {
4014 4033 braces++;
4015 4034 } else if (cc == '[') {
4016 4035 brackets++;
4017 4036 }
4018 4037
4019 4038 if (brackets == 0 && braces == 0) {
4020 4039 if (found_key) {
4021 4040 *dd = '\0';
4022 4041 return (dest);
4023 4042 }
4024 4043 dd = dest; /* reset string buffer */
4025 4044 state = DTRACE_JSON_COMMA;
4026 4045 }
4027 4046 break;
4028 4047 }
4029 4048 }
4030 4049 return (NULL);
4031 4050 }
4032 4051
4033 4052 /*
4034 4053 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
4035 4054 * Notice that we don't bother validating the proper number of arguments or
4036 4055 * their types in the tuple stack. This isn't needed because all argument
4037 4056 * interpretation is safe because of our load safety -- the worst that can
4038 4057 * happen is that a bogus program can obtain bogus results.
4039 4058 */
4040 4059 static void
4041 4060 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
4042 4061 dtrace_key_t *tupregs, int nargs,
4043 4062 dtrace_mstate_t *mstate, dtrace_state_t *state)
4044 4063 {
4045 4064 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
4046 4065 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
4047 4066 dtrace_vstate_t *vstate = &state->dts_vstate;
4048 4067
4049 4068 union {
4050 4069 mutex_impl_t mi;
4051 4070 uint64_t mx;
4052 4071 } m;
4053 4072
4054 4073 union {
4055 4074 krwlock_t ri;
4056 4075 uintptr_t rw;
4057 4076 } r;
4058 4077
4059 4078 switch (subr) {
4060 4079 case DIF_SUBR_RAND:
4061 4080 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
4062 4081 break;
4063 4082
4064 4083 case DIF_SUBR_MUTEX_OWNED:
4065 4084 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4066 4085 mstate, vstate)) {
4067 4086 regs[rd] = NULL;
4068 4087 break;
4069 4088 }
4070 4089
4071 4090 m.mx = dtrace_load64(tupregs[0].dttk_value);
4072 4091 if (MUTEX_TYPE_ADAPTIVE(&m.mi))
4073 4092 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
4074 4093 else
4075 4094 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
4076 4095 break;
4077 4096
4078 4097 case DIF_SUBR_MUTEX_OWNER:
4079 4098 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4080 4099 mstate, vstate)) {
4081 4100 regs[rd] = NULL;
4082 4101 break;
4083 4102 }
4084 4103
4085 4104 m.mx = dtrace_load64(tupregs[0].dttk_value);
4086 4105 if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
4087 4106 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
4088 4107 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
4089 4108 else
4090 4109 regs[rd] = 0;
4091 4110 break;
4092 4111
4093 4112 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
4094 4113 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4095 4114 mstate, vstate)) {
4096 4115 regs[rd] = NULL;
4097 4116 break;
4098 4117 }
4099 4118
4100 4119 m.mx = dtrace_load64(tupregs[0].dttk_value);
4101 4120 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
4102 4121 break;
4103 4122
4104 4123 case DIF_SUBR_MUTEX_TYPE_SPIN:
4105 4124 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4106 4125 mstate, vstate)) {
4107 4126 regs[rd] = NULL;
4108 4127 break;
4109 4128 }
4110 4129
4111 4130 m.mx = dtrace_load64(tupregs[0].dttk_value);
4112 4131 regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
4113 4132 break;
4114 4133
4115 4134 case DIF_SUBR_RW_READ_HELD: {
4116 4135 uintptr_t tmp;
4117 4136
4118 4137 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
4119 4138 mstate, vstate)) {
4120 4139 regs[rd] = NULL;
4121 4140 break;
4122 4141 }
4123 4142
4124 4143 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
4125 4144 regs[rd] = _RW_READ_HELD(&r.ri, tmp);
4126 4145 break;
4127 4146 }
4128 4147
4129 4148 case DIF_SUBR_RW_WRITE_HELD:
4130 4149 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
4131 4150 mstate, vstate)) {
4132 4151 regs[rd] = NULL;
4133 4152 break;
4134 4153 }
4135 4154
4136 4155 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
4137 4156 regs[rd] = _RW_WRITE_HELD(&r.ri);
4138 4157 break;
4139 4158
4140 4159 case DIF_SUBR_RW_ISWRITER:
4141 4160 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
4142 4161 mstate, vstate)) {
4143 4162 regs[rd] = NULL;
4144 4163 break;
4145 4164 }
4146 4165
4147 4166 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
4148 4167 regs[rd] = _RW_ISWRITER(&r.ri);
4149 4168 break;
4150 4169
4151 4170 case DIF_SUBR_BCOPY: {
4152 4171 /*
4153 4172 * We need to be sure that the destination is in the scratch
4154 4173 * region -- no other region is allowed.
4155 4174 */
4156 4175 uintptr_t src = tupregs[0].dttk_value;
4157 4176 uintptr_t dest = tupregs[1].dttk_value;
4158 4177 size_t size = tupregs[2].dttk_value;
4159 4178
4160 4179 if (!dtrace_inscratch(dest, size, mstate)) {
4161 4180 *flags |= CPU_DTRACE_BADADDR;
4162 4181 *illval = regs[rd];
4163 4182 break;
4164 4183 }
4165 4184
4166 4185 if (!dtrace_canload(src, size, mstate, vstate)) {
4167 4186 regs[rd] = NULL;
4168 4187 break;
4169 4188 }
4170 4189
4171 4190 dtrace_bcopy((void *)src, (void *)dest, size);
4172 4191 break;
4173 4192 }
4174 4193
4175 4194 case DIF_SUBR_ALLOCA:
4176 4195 case DIF_SUBR_COPYIN: {
4177 4196 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
4178 4197 uint64_t size =
4179 4198 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
4180 4199 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
4181 4200
4182 4201 /*
4183 4202 * This action doesn't require any credential checks since
4184 4203 * probes will not activate in user contexts to which the
4185 4204 * enabling user does not have permissions.
4186 4205 */
4187 4206
4188 4207 /*
4189 4208 * Rounding up the user allocation size could have overflowed
4190 4209 * a large, bogus allocation (like -1ULL) to 0.
4191 4210 */
4192 4211 if (scratch_size < size ||
4193 4212 !DTRACE_INSCRATCH(mstate, scratch_size)) {
4194 4213 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4195 4214 regs[rd] = NULL;
4196 4215 break;
4197 4216 }
4198 4217
4199 4218 if (subr == DIF_SUBR_COPYIN) {
4200 4219 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4201 4220 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
4202 4221 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4203 4222 }
4204 4223
4205 4224 mstate->dtms_scratch_ptr += scratch_size;
4206 4225 regs[rd] = dest;
4207 4226 break;
4208 4227 }
4209 4228
4210 4229 case DIF_SUBR_COPYINTO: {
4211 4230 uint64_t size = tupregs[1].dttk_value;
4212 4231 uintptr_t dest = tupregs[2].dttk_value;
4213 4232
4214 4233 /*
4215 4234 * This action doesn't require any credential checks since
4216 4235 * probes will not activate in user contexts to which the
4217 4236 * enabling user does not have permissions.
4218 4237 */
4219 4238 if (!dtrace_inscratch(dest, size, mstate)) {
4220 4239 *flags |= CPU_DTRACE_BADADDR;
4221 4240 *illval = regs[rd];
4222 4241 break;
4223 4242 }
4224 4243
4225 4244 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4226 4245 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
4227 4246 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4228 4247 break;
4229 4248 }
4230 4249
4231 4250 case DIF_SUBR_COPYINSTR: {
4232 4251 uintptr_t dest = mstate->dtms_scratch_ptr;
4233 4252 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4234 4253
4235 4254 if (nargs > 1 && tupregs[1].dttk_value < size)
4236 4255 size = tupregs[1].dttk_value + 1;
4237 4256
4238 4257 /*
4239 4258 * This action doesn't require any credential checks since
4240 4259 * probes will not activate in user contexts to which the
4241 4260 * enabling user does not have permissions.
4242 4261 */
4243 4262 if (!DTRACE_INSCRATCH(mstate, size)) {
4244 4263 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4245 4264 regs[rd] = NULL;
4246 4265 break;
4247 4266 }
4248 4267
4249 4268 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4250 4269 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
4251 4270 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4252 4271
4253 4272 ((char *)dest)[size - 1] = '\0';
4254 4273 mstate->dtms_scratch_ptr += size;
4255 4274 regs[rd] = dest;
4256 4275 break;
4257 4276 }
4258 4277
4259 4278 case DIF_SUBR_MSGSIZE:
4260 4279 case DIF_SUBR_MSGDSIZE: {
4261 4280 uintptr_t baddr = tupregs[0].dttk_value, daddr;
4262 4281 uintptr_t wptr, rptr;
4263 4282 size_t count = 0;
4264 4283 int cont = 0;
4265 4284
4266 4285 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4267 4286
4268 4287 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
4269 4288 vstate)) {
4270 4289 regs[rd] = NULL;
4271 4290 break;
4272 4291 }
4273 4292
4274 4293 wptr = dtrace_loadptr(baddr +
4275 4294 offsetof(mblk_t, b_wptr));
4276 4295
4277 4296 rptr = dtrace_loadptr(baddr +
4278 4297 offsetof(mblk_t, b_rptr));
4279 4298
4280 4299 if (wptr < rptr) {
4281 4300 *flags |= CPU_DTRACE_BADADDR;
4282 4301 *illval = tupregs[0].dttk_value;
4283 4302 break;
4284 4303 }
4285 4304
4286 4305 daddr = dtrace_loadptr(baddr +
4287 4306 offsetof(mblk_t, b_datap));
4288 4307
4289 4308 baddr = dtrace_loadptr(baddr +
4290 4309 offsetof(mblk_t, b_cont));
4291 4310
4292 4311 /*
4293 4312 * We want to prevent against denial-of-service here,
4294 4313 * so we're only going to search the list for
4295 4314 * dtrace_msgdsize_max mblks.
4296 4315 */
4297 4316 if (cont++ > dtrace_msgdsize_max) {
4298 4317 *flags |= CPU_DTRACE_ILLOP;
4299 4318 break;
4300 4319 }
4301 4320
4302 4321 if (subr == DIF_SUBR_MSGDSIZE) {
4303 4322 if (dtrace_load8(daddr +
4304 4323 offsetof(dblk_t, db_type)) != M_DATA)
4305 4324 continue;
4306 4325 }
4307 4326
4308 4327 count += wptr - rptr;
4309 4328 }
4310 4329
4311 4330 if (!(*flags & CPU_DTRACE_FAULT))
4312 4331 regs[rd] = count;
4313 4332
4314 4333 break;
4315 4334 }
4316 4335
4317 4336 case DIF_SUBR_PROGENYOF: {
4318 4337 pid_t pid = tupregs[0].dttk_value;
4319 4338 proc_t *p;
4320 4339 int rval = 0;
4321 4340
4322 4341 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4323 4342
4324 4343 for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
4325 4344 if (p->p_pidp->pid_id == pid) {
4326 4345 rval = 1;
4327 4346 break;
4328 4347 }
4329 4348 }
4330 4349
4331 4350 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4332 4351
4333 4352 regs[rd] = rval;
4334 4353 break;
4335 4354 }
4336 4355
4337 4356 case DIF_SUBR_SPECULATION:
4338 4357 regs[rd] = dtrace_speculation(state);
4339 4358 break;
4340 4359
4341 4360 case DIF_SUBR_COPYOUT: {
4342 4361 uintptr_t kaddr = tupregs[0].dttk_value;
4343 4362 uintptr_t uaddr = tupregs[1].dttk_value;
4344 4363 uint64_t size = tupregs[2].dttk_value;
4345 4364
4346 4365 if (!dtrace_destructive_disallow &&
4347 4366 dtrace_priv_proc_control(state, mstate) &&
4348 4367 !dtrace_istoxic(kaddr, size) &&
4349 4368 dtrace_canload(kaddr, size, mstate, vstate)) {
4350 4369 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4351 4370 dtrace_copyout(kaddr, uaddr, size, flags);
4352 4371 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4353 4372 }
4354 4373 break;
4355 4374 }
4356 4375
4357 4376 case DIF_SUBR_COPYOUTSTR: {
4358 4377 uintptr_t kaddr = tupregs[0].dttk_value;
4359 4378 uintptr_t uaddr = tupregs[1].dttk_value;
4360 4379 uint64_t size = tupregs[2].dttk_value;
4361 4380 size_t lim;
4362 4381
4363 4382 if (!dtrace_destructive_disallow &&
4364 4383 dtrace_priv_proc_control(state, mstate) &&
4365 4384 !dtrace_istoxic(kaddr, size) &&
4366 4385 dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) {
4367 4386 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4368 4387 dtrace_copyoutstr(kaddr, uaddr, lim, flags);
4369 4388 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4370 4389 }
4371 4390 break;
4372 4391 }
4373 4392
4374 4393 case DIF_SUBR_STRLEN: {
4375 4394 size_t size = state->dts_options[DTRACEOPT_STRSIZE];
4376 4395 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
4377 4396 size_t lim;
4378 4397
4379 4398 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
4380 4399 regs[rd] = NULL;
4381 4400 break;
4382 4401 }
4383 4402 regs[rd] = dtrace_strlen((char *)addr, lim);
4384 4403
4385 4404 break;
4386 4405 }
4387 4406
4388 4407 case DIF_SUBR_STRCHR:
4389 4408 case DIF_SUBR_STRRCHR: {
4390 4409 /*
4391 4410 * We're going to iterate over the string looking for the
4392 4411 * specified character. We will iterate until we have reached
4393 4412 * the string length or we have found the character. If this
4394 4413 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
4395 4414 * of the specified character instead of the first.
4396 4415 */
4397 4416 uintptr_t addr = tupregs[0].dttk_value;
4398 4417 uintptr_t addr_limit;
4399 4418 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4400 4419 size_t lim;
4401 4420 char c, target = (char)tupregs[1].dttk_value;
4402 4421
4403 4422 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
4404 4423 regs[rd] = NULL;
4405 4424 break;
4406 4425 }
4407 4426 addr_limit = addr + lim;
4408 4427
4409 4428 for (regs[rd] = NULL; addr < addr_limit; addr++) {
4410 4429 if ((c = dtrace_load8(addr)) == target) {
4411 4430 regs[rd] = addr;
4412 4431
4413 4432 if (subr == DIF_SUBR_STRCHR)
4414 4433 break;
4415 4434 }
4416 4435 if (c == '\0')
4417 4436 break;
4418 4437 }
4419 4438
4420 4439 break;
4421 4440 }
4422 4441
4423 4442 case DIF_SUBR_STRSTR:
4424 4443 case DIF_SUBR_INDEX:
4425 4444 case DIF_SUBR_RINDEX: {
4426 4445 /*
4427 4446 * We're going to iterate over the string looking for the
4428 4447 * specified string. We will iterate until we have reached
4429 4448 * the string length or we have found the string. (Yes, this
4430 4449 * is done in the most naive way possible -- but considering
4431 4450 * that the string we're searching for is likely to be
4432 4451 * relatively short, the complexity of Rabin-Karp or similar
4433 4452 * hardly seems merited.)
4434 4453 */
4435 4454 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
4436 4455 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
4437 4456 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4438 4457 size_t len = dtrace_strlen(addr, size);
4439 4458 size_t sublen = dtrace_strlen(substr, size);
4440 4459 char *limit = addr + len, *orig = addr;
4441 4460 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
4442 4461 int inc = 1;
4443 4462
4444 4463 regs[rd] = notfound;
4445 4464
4446 4465 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
4447 4466 regs[rd] = NULL;
4448 4467 break;
4449 4468 }
4450 4469
4451 4470 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
4452 4471 vstate)) {
4453 4472 regs[rd] = NULL;
4454 4473 break;
4455 4474 }
4456 4475
4457 4476 /*
4458 4477 * strstr() and index()/rindex() have similar semantics if
4459 4478 * both strings are the empty string: strstr() returns a
4460 4479 * pointer to the (empty) string, and index() and rindex()
4461 4480 * both return index 0 (regardless of any position argument).
4462 4481 */
4463 4482 if (sublen == 0 && len == 0) {
4464 4483 if (subr == DIF_SUBR_STRSTR)
4465 4484 regs[rd] = (uintptr_t)addr;
4466 4485 else
4467 4486 regs[rd] = 0;
4468 4487 break;
4469 4488 }
4470 4489
4471 4490 if (subr != DIF_SUBR_STRSTR) {
4472 4491 if (subr == DIF_SUBR_RINDEX) {
4473 4492 limit = orig - 1;
4474 4493 addr += len;
4475 4494 inc = -1;
4476 4495 }
4477 4496
4478 4497 /*
4479 4498 * Both index() and rindex() take an optional position
4480 4499 * argument that denotes the starting position.
4481 4500 */
4482 4501 if (nargs == 3) {
4483 4502 int64_t pos = (int64_t)tupregs[2].dttk_value;
4484 4503
4485 4504 /*
4486 4505 * If the position argument to index() is
4487 4506 * negative, Perl implicitly clamps it at
4488 4507 * zero. This semantic is a little surprising
4489 4508 * given the special meaning of negative
4490 4509 * positions to similar Perl functions like
4491 4510 * substr(), but it appears to reflect a
4492 4511 * notion that index() can start from a
4493 4512 * negative index and increment its way up to
4494 4513 * the string. Given this notion, Perl's
4495 4514 * rindex() is at least self-consistent in
4496 4515 * that it implicitly clamps positions greater
4497 4516 * than the string length to be the string
4498 4517 * length. Where Perl completely loses
4499 4518 * coherence, however, is when the specified
4500 4519 * substring is the empty string (""). In
4501 4520 * this case, even if the position is
4502 4521 * negative, rindex() returns 0 -- and even if
4503 4522 * the position is greater than the length,
4504 4523 * index() returns the string length. These
4505 4524 * semantics violate the notion that index()
4506 4525 * should never return a value less than the
4507 4526 * specified position and that rindex() should
4508 4527 * never return a value greater than the
4509 4528 * specified position. (One assumes that
4510 4529 * these semantics are artifacts of Perl's
4511 4530 * implementation and not the results of
4512 4531 * deliberate design -- it beggars belief that
4513 4532 * even Larry Wall could desire such oddness.)
4514 4533 * While in the abstract one would wish for
4515 4534 * consistent position semantics across
4516 4535 * substr(), index() and rindex() -- or at the
4517 4536 * very least self-consistent position
4518 4537 * semantics for index() and rindex() -- we
4519 4538 * instead opt to keep with the extant Perl
4520 4539 * semantics, in all their broken glory. (Do
4521 4540 * we have more desire to maintain Perl's
4522 4541 * semantics than Perl does? Probably.)
4523 4542 */
4524 4543 if (subr == DIF_SUBR_RINDEX) {
4525 4544 if (pos < 0) {
4526 4545 if (sublen == 0)
4527 4546 regs[rd] = 0;
4528 4547 break;
4529 4548 }
4530 4549
4531 4550 if (pos > len)
4532 4551 pos = len;
4533 4552 } else {
4534 4553 if (pos < 0)
4535 4554 pos = 0;
4536 4555
4537 4556 if (pos >= len) {
4538 4557 if (sublen == 0)
4539 4558 regs[rd] = len;
4540 4559 break;
4541 4560 }
4542 4561 }
4543 4562
4544 4563 addr = orig + pos;
4545 4564 }
4546 4565 }
4547 4566
4548 4567 for (regs[rd] = notfound; addr != limit; addr += inc) {
4549 4568 if (dtrace_strncmp(addr, substr, sublen) == 0) {
4550 4569 if (subr != DIF_SUBR_STRSTR) {
4551 4570 /*
4552 4571 * As D index() and rindex() are
4553 4572 * modeled on Perl (and not on awk),
4554 4573 * we return a zero-based (and not a
4555 4574 * one-based) index. (For you Perl
4556 4575 * weenies: no, we're not going to add
4557 4576 * $[ -- and shouldn't you be at a con
4558 4577 * or something?)
4559 4578 */
4560 4579 regs[rd] = (uintptr_t)(addr - orig);
4561 4580 break;
4562 4581 }
4563 4582
4564 4583 ASSERT(subr == DIF_SUBR_STRSTR);
4565 4584 regs[rd] = (uintptr_t)addr;
4566 4585 break;
4567 4586 }
4568 4587 }
4569 4588
4570 4589 break;
4571 4590 }
4572 4591
4573 4592 case DIF_SUBR_STRTOK: {
4574 4593 uintptr_t addr = tupregs[0].dttk_value;
4575 4594 uintptr_t tokaddr = tupregs[1].dttk_value;
4576 4595 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4577 4596 uintptr_t limit, toklimit;
4578 4597 size_t clim;
4579 4598 uint8_t c, tokmap[32]; /* 256 / 8 */
4580 4599 char *dest = (char *)mstate->dtms_scratch_ptr;
4581 4600 int i;
4582 4601
4583 4602 /*
4584 4603 * Check both the token buffer and (later) the input buffer,
4585 4604 * since both could be non-scratch addresses.
4586 4605 */
4587 4606 if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) {
4588 4607 regs[rd] = NULL;
4589 4608 break;
4590 4609 }
4591 4610 toklimit = tokaddr + clim;
4592 4611
4593 4612 if (!DTRACE_INSCRATCH(mstate, size)) {
4594 4613 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4595 4614 regs[rd] = NULL;
4596 4615 break;
4597 4616 }
4598 4617
4599 4618 if (addr == NULL) {
4600 4619 /*
4601 4620 * If the address specified is NULL, we use our saved
4602 4621 * strtok pointer from the mstate. Note that this
4603 4622 * means that the saved strtok pointer is _only_
4604 4623 * valid within multiple enablings of the same probe --
4605 4624 * it behaves like an implicit clause-local variable.
4606 4625 */
4607 4626 addr = mstate->dtms_strtok;
4608 4627 limit = mstate->dtms_strtok_limit;
4609 4628 } else {
4610 4629 /*
4611 4630 * If the user-specified address is non-NULL we must
4612 4631 * access check it. This is the only time we have
4613 4632 * a chance to do so, since this address may reside
4614 4633 * in the string table of this clause-- future calls
4615 4634 * (when we fetch addr from mstate->dtms_strtok)
4616 4635 * would fail this access check.
4617 4636 */
4618 4637 if (!dtrace_strcanload(addr, size, &clim, mstate,
4619 4638 vstate)) {
4620 4639 regs[rd] = NULL;
4621 4640 break;
4622 4641 }
4623 4642 limit = addr + clim;
4624 4643 }
4625 4644
4626 4645 /*
4627 4646 * First, zero the token map, and then process the token
4628 4647 * string -- setting a bit in the map for every character
4629 4648 * found in the token string.
4630 4649 */
4631 4650 for (i = 0; i < sizeof (tokmap); i++)
4632 4651 tokmap[i] = 0;
4633 4652
4634 4653 for (; tokaddr < toklimit; tokaddr++) {
4635 4654 if ((c = dtrace_load8(tokaddr)) == '\0')
4636 4655 break;
4637 4656
4638 4657 ASSERT((c >> 3) < sizeof (tokmap));
4639 4658 tokmap[c >> 3] |= (1 << (c & 0x7));
4640 4659 }
4641 4660
4642 4661 for (; addr < limit; addr++) {
4643 4662 /*
4644 4663 * We're looking for a character that is _not_
4645 4664 * contained in the token string.
4646 4665 */
4647 4666 if ((c = dtrace_load8(addr)) == '\0')
4648 4667 break;
4649 4668
4650 4669 if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
4651 4670 break;
4652 4671 }
4653 4672
4654 4673 if (c == '\0') {
4655 4674 /*
4656 4675 * We reached the end of the string without finding
4657 4676 * any character that was not in the token string.
4658 4677 * We return NULL in this case, and we set the saved
4659 4678 * address to NULL as well.
4660 4679 */
4661 4680 regs[rd] = NULL;
4662 4681 mstate->dtms_strtok = NULL;
4663 4682 mstate->dtms_strtok_limit = NULL;
4664 4683 break;
4665 4684 }
4666 4685
4667 4686 /*
4668 4687 * From here on, we're copying into the destination string.
4669 4688 */
4670 4689 for (i = 0; addr < limit && i < size - 1; addr++) {
4671 4690 if ((c = dtrace_load8(addr)) == '\0')
4672 4691 break;
4673 4692
4674 4693 if (tokmap[c >> 3] & (1 << (c & 0x7)))
4675 4694 break;
4676 4695
4677 4696 ASSERT(i < size);
4678 4697 dest[i++] = c;
4679 4698 }
4680 4699
4681 4700 ASSERT(i < size);
4682 4701 dest[i] = '\0';
4683 4702 regs[rd] = (uintptr_t)dest;
4684 4703 mstate->dtms_scratch_ptr += size;
4685 4704 mstate->dtms_strtok = addr;
4686 4705 mstate->dtms_strtok_limit = limit;
4687 4706 break;
4688 4707 }
4689 4708
4690 4709 case DIF_SUBR_SUBSTR: {
4691 4710 uintptr_t s = tupregs[0].dttk_value;
4692 4711 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4693 4712 char *d = (char *)mstate->dtms_scratch_ptr;
4694 4713 int64_t index = (int64_t)tupregs[1].dttk_value;
4695 4714 int64_t remaining = (int64_t)tupregs[2].dttk_value;
4696 4715 size_t len = dtrace_strlen((char *)s, size);
4697 4716 int64_t i;
4698 4717
4699 4718 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
4700 4719 regs[rd] = NULL;
4701 4720 break;
4702 4721 }
4703 4722
4704 4723 if (!DTRACE_INSCRATCH(mstate, size)) {
4705 4724 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4706 4725 regs[rd] = NULL;
4707 4726 break;
4708 4727 }
4709 4728
4710 4729 if (nargs <= 2)
4711 4730 remaining = (int64_t)size;
4712 4731
4713 4732 if (index < 0) {
4714 4733 index += len;
4715 4734
4716 4735 if (index < 0 && index + remaining > 0) {
4717 4736 remaining += index;
4718 4737 index = 0;
4719 4738 }
4720 4739 }
4721 4740
4722 4741 if (index >= len || index < 0) {
4723 4742 remaining = 0;
4724 4743 } else if (remaining < 0) {
4725 4744 remaining += len - index;
4726 4745 } else if (index + remaining > size) {
4727 4746 remaining = size - index;
4728 4747 }
4729 4748
4730 4749 for (i = 0; i < remaining; i++) {
4731 4750 if ((d[i] = dtrace_load8(s + index + i)) == '\0')
4732 4751 break;
4733 4752 }
4734 4753
4735 4754 d[i] = '\0';
4736 4755
4737 4756 mstate->dtms_scratch_ptr += size;
4738 4757 regs[rd] = (uintptr_t)d;
4739 4758 break;
4740 4759 }
4741 4760
4742 4761 case DIF_SUBR_JSON: {
4743 4762 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4744 4763 uintptr_t json = tupregs[0].dttk_value;
4745 4764 size_t jsonlen = dtrace_strlen((char *)json, size);
4746 4765 uintptr_t elem = tupregs[1].dttk_value;
4747 4766 size_t elemlen = dtrace_strlen((char *)elem, size);
4748 4767
4749 4768 char *dest = (char *)mstate->dtms_scratch_ptr;
4750 4769 char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1;
4751 4770 char *ee = elemlist;
4752 4771 int nelems = 1;
4753 4772 uintptr_t cur;
4754 4773
4755 4774 if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) ||
4756 4775 !dtrace_canload(elem, elemlen + 1, mstate, vstate)) {
4757 4776 regs[rd] = NULL;
4758 4777 break;
4759 4778 }
4760 4779
4761 4780 if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) {
4762 4781 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4763 4782 regs[rd] = NULL;
4764 4783 break;
4765 4784 }
4766 4785
4767 4786 /*
4768 4787 * Read the element selector and split it up into a packed list
4769 4788 * of strings.
4770 4789 */
4771 4790 for (cur = elem; cur < elem + elemlen; cur++) {
4772 4791 char cc = dtrace_load8(cur);
4773 4792
4774 4793 if (cur == elem && cc == '[') {
4775 4794 /*
4776 4795 * If the first element selector key is
4777 4796 * actually an array index then ignore the
4778 4797 * bracket.
4779 4798 */
4780 4799 continue;
4781 4800 }
4782 4801
4783 4802 if (cc == ']')
4784 4803 continue;
4785 4804
4786 4805 if (cc == '.' || cc == '[') {
4787 4806 nelems++;
4788 4807 cc = '\0';
4789 4808 }
4790 4809
4791 4810 *ee++ = cc;
4792 4811 }
4793 4812 *ee++ = '\0';
4794 4813
4795 4814 if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist,
4796 4815 nelems, dest)) != NULL)
4797 4816 mstate->dtms_scratch_ptr += jsonlen + 1;
4798 4817 break;
4799 4818 }
4800 4819
4801 4820 case DIF_SUBR_TOUPPER:
4802 4821 case DIF_SUBR_TOLOWER: {
4803 4822 uintptr_t s = tupregs[0].dttk_value;
4804 4823 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4805 4824 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4806 4825 size_t len = dtrace_strlen((char *)s, size);
4807 4826 char lower, upper, convert;
4808 4827 int64_t i;
4809 4828
4810 4829 if (subr == DIF_SUBR_TOUPPER) {
4811 4830 lower = 'a';
4812 4831 upper = 'z';
4813 4832 convert = 'A';
4814 4833 } else {
4815 4834 lower = 'A';
4816 4835 upper = 'Z';
4817 4836 convert = 'a';
4818 4837 }
4819 4838
4820 4839 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
4821 4840 regs[rd] = NULL;
4822 4841 break;
4823 4842 }
4824 4843
4825 4844 if (!DTRACE_INSCRATCH(mstate, size)) {
4826 4845 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4827 4846 regs[rd] = NULL;
4828 4847 break;
4829 4848 }
4830 4849
4831 4850 for (i = 0; i < size - 1; i++) {
4832 4851 if ((c = dtrace_load8(s + i)) == '\0')
4833 4852 break;
4834 4853
4835 4854 if (c >= lower && c <= upper)
4836 4855 c = convert + (c - lower);
4837 4856
4838 4857 dest[i] = c;
4839 4858 }
4840 4859
4841 4860 ASSERT(i < size);
4842 4861 dest[i] = '\0';
4843 4862 regs[rd] = (uintptr_t)dest;
4844 4863 mstate->dtms_scratch_ptr += size;
4845 4864 break;
4846 4865 }
4847 4866
4848 4867 case DIF_SUBR_GETMAJOR:
4849 4868 #ifdef _LP64
4850 4869 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
4851 4870 #else
4852 4871 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
4853 4872 #endif
4854 4873 break;
4855 4874
4856 4875 case DIF_SUBR_GETMINOR:
4857 4876 #ifdef _LP64
4858 4877 regs[rd] = tupregs[0].dttk_value & MAXMIN64;
4859 4878 #else
4860 4879 regs[rd] = tupregs[0].dttk_value & MAXMIN;
4861 4880 #endif
4862 4881 break;
4863 4882
4864 4883 case DIF_SUBR_DDI_PATHNAME: {
4865 4884 /*
4866 4885 * This one is a galactic mess. We are going to roughly
4867 4886 * emulate ddi_pathname(), but it's made more complicated
4868 4887 * by the fact that we (a) want to include the minor name and
4869 4888 * (b) must proceed iteratively instead of recursively.
4870 4889 */
4871 4890 uintptr_t dest = mstate->dtms_scratch_ptr;
4872 4891 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4873 4892 char *start = (char *)dest, *end = start + size - 1;
4874 4893 uintptr_t daddr = tupregs[0].dttk_value;
4875 4894 int64_t minor = (int64_t)tupregs[1].dttk_value;
4876 4895 char *s;
4877 4896 int i, len, depth = 0;
4878 4897
4879 4898 /*
4880 4899 * Due to all the pointer jumping we do and context we must
4881 4900 * rely upon, we just mandate that the user must have kernel
4882 4901 * read privileges to use this routine.
4883 4902 */
4884 4903 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
4885 4904 *flags |= CPU_DTRACE_KPRIV;
4886 4905 *illval = daddr;
4887 4906 regs[rd] = NULL;
4888 4907 }
4889 4908
4890 4909 if (!DTRACE_INSCRATCH(mstate, size)) {
4891 4910 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4892 4911 regs[rd] = NULL;
4893 4912 break;
4894 4913 }
4895 4914
4896 4915 *end = '\0';
4897 4916
4898 4917 /*
4899 4918 * We want to have a name for the minor. In order to do this,
4900 4919 * we need to walk the minor list from the devinfo. We want
4901 4920 * to be sure that we don't infinitely walk a circular list,
4902 4921 * so we check for circularity by sending a scout pointer
4903 4922 * ahead two elements for every element that we iterate over;
4904 4923 * if the list is circular, these will ultimately point to the
4905 4924 * same element. You may recognize this little trick as the
4906 4925 * answer to a stupid interview question -- one that always
4907 4926 * seems to be asked by those who had to have it laboriously
4908 4927 * explained to them, and who can't even concisely describe
4909 4928 * the conditions under which one would be forced to resort to
4910 4929 * this technique. Needless to say, those conditions are
4911 4930 * found here -- and probably only here. Is this the only use
4912 4931 * of this infamous trick in shipping, production code? If it
4913 4932 * isn't, it probably should be...
4914 4933 */
4915 4934 if (minor != -1) {
4916 4935 uintptr_t maddr = dtrace_loadptr(daddr +
4917 4936 offsetof(struct dev_info, devi_minor));
4918 4937
4919 4938 uintptr_t next = offsetof(struct ddi_minor_data, next);
4920 4939 uintptr_t name = offsetof(struct ddi_minor_data,
4921 4940 d_minor) + offsetof(struct ddi_minor, name);
4922 4941 uintptr_t dev = offsetof(struct ddi_minor_data,
4923 4942 d_minor) + offsetof(struct ddi_minor, dev);
4924 4943 uintptr_t scout;
4925 4944
4926 4945 if (maddr != NULL)
4927 4946 scout = dtrace_loadptr(maddr + next);
4928 4947
4929 4948 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4930 4949 uint64_t m;
4931 4950 #ifdef _LP64
4932 4951 m = dtrace_load64(maddr + dev) & MAXMIN64;
4933 4952 #else
4934 4953 m = dtrace_load32(maddr + dev) & MAXMIN;
4935 4954 #endif
4936 4955 if (m != minor) {
4937 4956 maddr = dtrace_loadptr(maddr + next);
4938 4957
4939 4958 if (scout == NULL)
4940 4959 continue;
4941 4960
4942 4961 scout = dtrace_loadptr(scout + next);
4943 4962
4944 4963 if (scout == NULL)
4945 4964 continue;
4946 4965
4947 4966 scout = dtrace_loadptr(scout + next);
4948 4967
4949 4968 if (scout == NULL)
4950 4969 continue;
4951 4970
4952 4971 if (scout == maddr) {
4953 4972 *flags |= CPU_DTRACE_ILLOP;
4954 4973 break;
4955 4974 }
4956 4975
4957 4976 continue;
4958 4977 }
4959 4978
4960 4979 /*
4961 4980 * We have the minor data. Now we need to
4962 4981 * copy the minor's name into the end of the
4963 4982 * pathname.
4964 4983 */
4965 4984 s = (char *)dtrace_loadptr(maddr + name);
4966 4985 len = dtrace_strlen(s, size);
4967 4986
4968 4987 if (*flags & CPU_DTRACE_FAULT)
4969 4988 break;
4970 4989
4971 4990 if (len != 0) {
4972 4991 if ((end -= (len + 1)) < start)
4973 4992 break;
4974 4993
4975 4994 *end = ':';
4976 4995 }
4977 4996
4978 4997 for (i = 1; i <= len; i++)
4979 4998 end[i] = dtrace_load8((uintptr_t)s++);
4980 4999 break;
4981 5000 }
4982 5001 }
4983 5002
4984 5003 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
4985 5004 ddi_node_state_t devi_state;
4986 5005
4987 5006 devi_state = dtrace_load32(daddr +
4988 5007 offsetof(struct dev_info, devi_node_state));
4989 5008
4990 5009 if (*flags & CPU_DTRACE_FAULT)
4991 5010 break;
4992 5011
4993 5012 if (devi_state >= DS_INITIALIZED) {
4994 5013 s = (char *)dtrace_loadptr(daddr +
4995 5014 offsetof(struct dev_info, devi_addr));
4996 5015 len = dtrace_strlen(s, size);
4997 5016
4998 5017 if (*flags & CPU_DTRACE_FAULT)
4999 5018 break;
5000 5019
5001 5020 if (len != 0) {
5002 5021 if ((end -= (len + 1)) < start)
5003 5022 break;
5004 5023
5005 5024 *end = '@';
5006 5025 }
5007 5026
5008 5027 for (i = 1; i <= len; i++)
5009 5028 end[i] = dtrace_load8((uintptr_t)s++);
5010 5029 }
5011 5030
5012 5031 /*
5013 5032 * Now for the node name...
5014 5033 */
5015 5034 s = (char *)dtrace_loadptr(daddr +
5016 5035 offsetof(struct dev_info, devi_node_name));
5017 5036
5018 5037 daddr = dtrace_loadptr(daddr +
5019 5038 offsetof(struct dev_info, devi_parent));
5020 5039
5021 5040 /*
5022 5041 * If our parent is NULL (that is, if we're the root
5023 5042 * node), we're going to use the special path
5024 5043 * "devices".
5025 5044 */
5026 5045 if (daddr == NULL)
5027 5046 s = "devices";
5028 5047
5029 5048 len = dtrace_strlen(s, size);
5030 5049 if (*flags & CPU_DTRACE_FAULT)
5031 5050 break;
5032 5051
5033 5052 if ((end -= (len + 1)) < start)
5034 5053 break;
5035 5054
5036 5055 for (i = 1; i <= len; i++)
5037 5056 end[i] = dtrace_load8((uintptr_t)s++);
5038 5057 *end = '/';
5039 5058
5040 5059 if (depth++ > dtrace_devdepth_max) {
5041 5060 *flags |= CPU_DTRACE_ILLOP;
5042 5061 break;
5043 5062 }
5044 5063 }
5045 5064
5046 5065 if (end < start)
5047 5066 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5048 5067
5049 5068 if (daddr == NULL) {
5050 5069 regs[rd] = (uintptr_t)end;
5051 5070 mstate->dtms_scratch_ptr += size;
5052 5071 }
5053 5072
5054 5073 break;
5055 5074 }
5056 5075
5057 5076 case DIF_SUBR_STRJOIN: {
5058 5077 char *d = (char *)mstate->dtms_scratch_ptr;
5059 5078 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5060 5079 uintptr_t s1 = tupregs[0].dttk_value;
5061 5080 uintptr_t s2 = tupregs[1].dttk_value;
5062 5081 int i = 0, j = 0;
5063 5082 size_t lim1, lim2;
5064 5083 char c;
5065 5084
5066 5085 if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) ||
5067 5086 !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) {
5068 5087 regs[rd] = NULL;
5069 5088 break;
5070 5089 }
5071 5090
5072 5091 if (!DTRACE_INSCRATCH(mstate, size)) {
5073 5092 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5074 5093 regs[rd] = NULL;
5075 5094 break;
5076 5095 }
5077 5096
5078 5097 for (;;) {
5079 5098 if (i >= size) {
5080 5099 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5081 5100 regs[rd] = NULL;
5082 5101 break;
5083 5102 }
5084 5103 c = (i >= lim1) ? '\0' : dtrace_load8(s1++);
5085 5104 if ((d[i++] = c) == '\0') {
5086 5105 i--;
5087 5106 break;
5088 5107 }
5089 5108 }
5090 5109
5091 5110 for (;;) {
5092 5111 if (i >= size) {
5093 5112 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5094 5113 regs[rd] = NULL;
5095 5114 break;
5096 5115 }
5097 5116
5098 5117 c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++);
5099 5118 if ((d[i++] = c) == '\0')
5100 5119 break;
5101 5120 }
5102 5121
5103 5122 if (i < size) {
5104 5123 mstate->dtms_scratch_ptr += i;
5105 5124 regs[rd] = (uintptr_t)d;
5106 5125 }
5107 5126
5108 5127 break;
5109 5128 }
5110 5129
5111 5130 case DIF_SUBR_STRTOLL: {
5112 5131 uintptr_t s = tupregs[0].dttk_value;
5113 5132 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5114 5133 size_t lim;
5115 5134 int base = 10;
5116 5135
5117 5136 if (nargs > 1) {
5118 5137 if ((base = tupregs[1].dttk_value) <= 1 ||
5119 5138 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
5120 5139 *flags |= CPU_DTRACE_ILLOP;
5121 5140 break;
5122 5141 }
5123 5142 }
5124 5143
5125 5144 if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) {
5126 5145 regs[rd] = INT64_MIN;
5127 5146 break;
5128 5147 }
5129 5148
5130 5149 regs[rd] = dtrace_strtoll((char *)s, base, lim);
5131 5150 break;
5132 5151 }
5133 5152
5134 5153 case DIF_SUBR_LLTOSTR: {
5135 5154 int64_t i = (int64_t)tupregs[0].dttk_value;
5136 5155 uint64_t val, digit;
5137 5156 uint64_t size = 65; /* enough room for 2^64 in binary */
5138 5157 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
5139 5158 int base = 10;
5140 5159
5141 5160 if (nargs > 1) {
5142 5161 if ((base = tupregs[1].dttk_value) <= 1 ||
5143 5162 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
5144 5163 *flags |= CPU_DTRACE_ILLOP;
5145 5164 break;
5146 5165 }
5147 5166 }
5148 5167
5149 5168 val = (base == 10 && i < 0) ? i * -1 : i;
5150 5169
5151 5170 if (!DTRACE_INSCRATCH(mstate, size)) {
5152 5171 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5153 5172 regs[rd] = NULL;
5154 5173 break;
5155 5174 }
5156 5175
5157 5176 for (*end-- = '\0'; val; val /= base) {
5158 5177 if ((digit = val % base) <= '9' - '0') {
5159 5178 *end-- = '0' + digit;
5160 5179 } else {
5161 5180 *end-- = 'a' + (digit - ('9' - '0') - 1);
5162 5181 }
5163 5182 }
5164 5183
5165 5184 if (i == 0 && base == 16)
5166 5185 *end-- = '0';
5167 5186
5168 5187 if (base == 16)
5169 5188 *end-- = 'x';
5170 5189
5171 5190 if (i == 0 || base == 8 || base == 16)
5172 5191 *end-- = '0';
5173 5192
5174 5193 if (i < 0 && base == 10)
5175 5194 *end-- = '-';
5176 5195
5177 5196 regs[rd] = (uintptr_t)end + 1;
5178 5197 mstate->dtms_scratch_ptr += size;
5179 5198 break;
5180 5199 }
5181 5200
5182 5201 case DIF_SUBR_HTONS:
5183 5202 case DIF_SUBR_NTOHS:
5184 5203 #ifdef _BIG_ENDIAN
5185 5204 regs[rd] = (uint16_t)tupregs[0].dttk_value;
5186 5205 #else
5187 5206 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
5188 5207 #endif
5189 5208 break;
5190 5209
5191 5210
5192 5211 case DIF_SUBR_HTONL:
5193 5212 case DIF_SUBR_NTOHL:
5194 5213 #ifdef _BIG_ENDIAN
5195 5214 regs[rd] = (uint32_t)tupregs[0].dttk_value;
5196 5215 #else
5197 5216 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
5198 5217 #endif
5199 5218 break;
5200 5219
5201 5220
5202 5221 case DIF_SUBR_HTONLL:
5203 5222 case DIF_SUBR_NTOHLL:
5204 5223 #ifdef _BIG_ENDIAN
5205 5224 regs[rd] = (uint64_t)tupregs[0].dttk_value;
5206 5225 #else
5207 5226 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
5208 5227 #endif
5209 5228 break;
5210 5229
5211 5230
5212 5231 case DIF_SUBR_DIRNAME:
5213 5232 case DIF_SUBR_BASENAME: {
5214 5233 char *dest = (char *)mstate->dtms_scratch_ptr;
5215 5234 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5216 5235 uintptr_t src = tupregs[0].dttk_value;
5217 5236 int i, j, len = dtrace_strlen((char *)src, size);
5218 5237 int lastbase = -1, firstbase = -1, lastdir = -1;
5219 5238 int start, end;
5220 5239
5221 5240 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
5222 5241 regs[rd] = NULL;
5223 5242 break;
5224 5243 }
5225 5244
5226 5245 if (!DTRACE_INSCRATCH(mstate, size)) {
5227 5246 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5228 5247 regs[rd] = NULL;
5229 5248 break;
5230 5249 }
5231 5250
5232 5251 /*
5233 5252 * The basename and dirname for a zero-length string is
5234 5253 * defined to be "."
5235 5254 */
5236 5255 if (len == 0) {
5237 5256 len = 1;
5238 5257 src = (uintptr_t)".";
5239 5258 }
5240 5259
5241 5260 /*
5242 5261 * Start from the back of the string, moving back toward the
5243 5262 * front until we see a character that isn't a slash. That
5244 5263 * character is the last character in the basename.
5245 5264 */
5246 5265 for (i = len - 1; i >= 0; i--) {
5247 5266 if (dtrace_load8(src + i) != '/')
5248 5267 break;
5249 5268 }
5250 5269
5251 5270 if (i >= 0)
5252 5271 lastbase = i;
5253 5272
5254 5273 /*
5255 5274 * Starting from the last character in the basename, move
5256 5275 * towards the front until we find a slash. The character
5257 5276 * that we processed immediately before that is the first
5258 5277 * character in the basename.
5259 5278 */
5260 5279 for (; i >= 0; i--) {
5261 5280 if (dtrace_load8(src + i) == '/')
5262 5281 break;
5263 5282 }
5264 5283
5265 5284 if (i >= 0)
5266 5285 firstbase = i + 1;
5267 5286
5268 5287 /*
5269 5288 * Now keep going until we find a non-slash character. That
5270 5289 * character is the last character in the dirname.
5271 5290 */
5272 5291 for (; i >= 0; i--) {
5273 5292 if (dtrace_load8(src + i) != '/')
5274 5293 break;
5275 5294 }
5276 5295
5277 5296 if (i >= 0)
5278 5297 lastdir = i;
5279 5298
5280 5299 ASSERT(!(lastbase == -1 && firstbase != -1));
5281 5300 ASSERT(!(firstbase == -1 && lastdir != -1));
5282 5301
5283 5302 if (lastbase == -1) {
5284 5303 /*
5285 5304 * We didn't find a non-slash character. We know that
5286 5305 * the length is non-zero, so the whole string must be
5287 5306 * slashes. In either the dirname or the basename
5288 5307 * case, we return '/'.
5289 5308 */
5290 5309 ASSERT(firstbase == -1);
5291 5310 firstbase = lastbase = lastdir = 0;
5292 5311 }
5293 5312
5294 5313 if (firstbase == -1) {
5295 5314 /*
5296 5315 * The entire string consists only of a basename
5297 5316 * component. If we're looking for dirname, we need
5298 5317 * to change our string to be just "."; if we're
5299 5318 * looking for a basename, we'll just set the first
5300 5319 * character of the basename to be 0.
5301 5320 */
5302 5321 if (subr == DIF_SUBR_DIRNAME) {
5303 5322 ASSERT(lastdir == -1);
5304 5323 src = (uintptr_t)".";
5305 5324 lastdir = 0;
5306 5325 } else {
5307 5326 firstbase = 0;
5308 5327 }
5309 5328 }
5310 5329
5311 5330 if (subr == DIF_SUBR_DIRNAME) {
5312 5331 if (lastdir == -1) {
5313 5332 /*
5314 5333 * We know that we have a slash in the name --
5315 5334 * or lastdir would be set to 0, above. And
5316 5335 * because lastdir is -1, we know that this
5317 5336 * slash must be the first character. (That
5318 5337 * is, the full string must be of the form
5319 5338 * "/basename".) In this case, the last
5320 5339 * character of the directory name is 0.
5321 5340 */
5322 5341 lastdir = 0;
5323 5342 }
5324 5343
5325 5344 start = 0;
5326 5345 end = lastdir;
5327 5346 } else {
5328 5347 ASSERT(subr == DIF_SUBR_BASENAME);
5329 5348 ASSERT(firstbase != -1 && lastbase != -1);
5330 5349 start = firstbase;
5331 5350 end = lastbase;
5332 5351 }
5333 5352
5334 5353 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
5335 5354 dest[j] = dtrace_load8(src + i);
5336 5355
5337 5356 dest[j] = '\0';
5338 5357 regs[rd] = (uintptr_t)dest;
5339 5358 mstate->dtms_scratch_ptr += size;
5340 5359 break;
5341 5360 }
5342 5361
5343 5362 case DIF_SUBR_GETF: {
5344 5363 uintptr_t fd = tupregs[0].dttk_value;
5345 5364 uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
5346 5365 file_t *fp;
5347 5366
5348 5367 if (!dtrace_priv_proc(state, mstate)) {
5349 5368 regs[rd] = NULL;
5350 5369 break;
5351 5370 }
5352 5371
5353 5372 /*
5354 5373 * This is safe because fi_nfiles only increases, and the
5355 5374 * fi_list array is not freed when the array size doubles.
5356 5375 * (See the comment in flist_grow() for details on the
5357 5376 * management of the u_finfo structure.)
5358 5377 */
5359 5378 fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
5360 5379
5361 5380 mstate->dtms_getf = fp;
5362 5381 regs[rd] = (uintptr_t)fp;
5363 5382 break;
5364 5383 }
5365 5384
5366 5385 case DIF_SUBR_CLEANPATH: {
5367 5386 char *dest = (char *)mstate->dtms_scratch_ptr, c;
5368 5387 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5369 5388 uintptr_t src = tupregs[0].dttk_value;
5370 5389 size_t lim;
5371 5390 int i = 0, j = 0;
5372 5391 zone_t *z;
5373 5392
5374 5393 if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
5375 5394 regs[rd] = NULL;
5376 5395 break;
5377 5396 }
5378 5397
5379 5398 if (!DTRACE_INSCRATCH(mstate, size)) {
5380 5399 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5381 5400 regs[rd] = NULL;
5382 5401 break;
5383 5402 }
5384 5403
5385 5404 /*
5386 5405 * Move forward, loading each character.
5387 5406 */
5388 5407 do {
5389 5408 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5390 5409 next:
5391 5410 if (j + 5 >= size) /* 5 = strlen("/..c\0") */
5392 5411 break;
5393 5412
5394 5413 if (c != '/') {
5395 5414 dest[j++] = c;
5396 5415 continue;
5397 5416 }
5398 5417
5399 5418 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5400 5419
5401 5420 if (c == '/') {
5402 5421 /*
5403 5422 * We have two slashes -- we can just advance
5404 5423 * to the next character.
5405 5424 */
5406 5425 goto next;
5407 5426 }
5408 5427
5409 5428 if (c != '.') {
5410 5429 /*
5411 5430 * This is not "." and it's not ".." -- we can
5412 5431 * just store the "/" and this character and
5413 5432 * drive on.
5414 5433 */
5415 5434 dest[j++] = '/';
5416 5435 dest[j++] = c;
5417 5436 continue;
5418 5437 }
5419 5438
5420 5439 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5421 5440
5422 5441 if (c == '/') {
5423 5442 /*
5424 5443 * This is a "/./" component. We're not going
5425 5444 * to store anything in the destination buffer;
5426 5445 * we're just going to go to the next component.
5427 5446 */
5428 5447 goto next;
5429 5448 }
5430 5449
5431 5450 if (c != '.') {
5432 5451 /*
5433 5452 * This is not ".." -- we can just store the
5434 5453 * "/." and this character and continue
5435 5454 * processing.
5436 5455 */
5437 5456 dest[j++] = '/';
5438 5457 dest[j++] = '.';
5439 5458 dest[j++] = c;
5440 5459 continue;
5441 5460 }
5442 5461
5443 5462 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5444 5463
5445 5464 if (c != '/' && c != '\0') {
5446 5465 /*
5447 5466 * This is not ".." -- it's "..[mumble]".
5448 5467 * We'll store the "/.." and this character
5449 5468 * and continue processing.
5450 5469 */
5451 5470 dest[j++] = '/';
5452 5471 dest[j++] = '.';
5453 5472 dest[j++] = '.';
5454 5473 dest[j++] = c;
5455 5474 continue;
5456 5475 }
5457 5476
5458 5477 /*
5459 5478 * This is "/../" or "/..\0". We need to back up
5460 5479 * our destination pointer until we find a "/".
5461 5480 */
5462 5481 i--;
5463 5482 while (j != 0 && dest[--j] != '/')
5464 5483 continue;
5465 5484
5466 5485 if (c == '\0')
5467 5486 dest[++j] = '/';
5468 5487 } while (c != '\0');
5469 5488
5470 5489 dest[j] = '\0';
5471 5490
5472 5491 if (mstate->dtms_getf != NULL &&
5473 5492 !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
5474 5493 (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
5475 5494 /*
5476 5495 * If we've done a getf() as a part of this ECB and we
5477 5496 * don't have kernel access (and we're not in the global
5478 5497 * zone), check if the path we cleaned up begins with
5479 5498 * the zone's root path, and trim it off if so. Note
5480 5499 * that this is an output cleanliness issue, not a
5481 5500 * security issue: knowing one's zone root path does
5482 5501 * not enable privilege escalation.
5483 5502 */
5484 5503 if (strstr(dest, z->zone_rootpath) == dest)
5485 5504 dest += strlen(z->zone_rootpath) - 1;
5486 5505 }
5487 5506
5488 5507 regs[rd] = (uintptr_t)dest;
5489 5508 mstate->dtms_scratch_ptr += size;
5490 5509 break;
5491 5510 }
5492 5511
5493 5512 case DIF_SUBR_INET_NTOA:
5494 5513 case DIF_SUBR_INET_NTOA6:
5495 5514 case DIF_SUBR_INET_NTOP: {
5496 5515 size_t size;
5497 5516 int af, argi, i;
5498 5517 char *base, *end;
5499 5518
5500 5519 if (subr == DIF_SUBR_INET_NTOP) {
5501 5520 af = (int)tupregs[0].dttk_value;
5502 5521 argi = 1;
5503 5522 } else {
5504 5523 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
5505 5524 argi = 0;
5506 5525 }
5507 5526
5508 5527 if (af == AF_INET) {
5509 5528 ipaddr_t ip4;
5510 5529 uint8_t *ptr8, val;
5511 5530
5512 5531 if (!dtrace_canload(tupregs[argi].dttk_value,
5513 5532 sizeof (ipaddr_t), mstate, vstate)) {
5514 5533 regs[rd] = NULL;
5515 5534 break;
5516 5535 }
5517 5536
5518 5537 /*
5519 5538 * Safely load the IPv4 address.
5520 5539 */
5521 5540 ip4 = dtrace_load32(tupregs[argi].dttk_value);
5522 5541
5523 5542 /*
5524 5543 * Check an IPv4 string will fit in scratch.
5525 5544 */
5526 5545 size = INET_ADDRSTRLEN;
5527 5546 if (!DTRACE_INSCRATCH(mstate, size)) {
5528 5547 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5529 5548 regs[rd] = NULL;
5530 5549 break;
5531 5550 }
5532 5551 base = (char *)mstate->dtms_scratch_ptr;
5533 5552 end = (char *)mstate->dtms_scratch_ptr + size - 1;
5534 5553
5535 5554 /*
5536 5555 * Stringify as a dotted decimal quad.
5537 5556 */
5538 5557 *end-- = '\0';
5539 5558 ptr8 = (uint8_t *)&ip4;
5540 5559 for (i = 3; i >= 0; i--) {
5541 5560 val = ptr8[i];
5542 5561
5543 5562 if (val == 0) {
5544 5563 *end-- = '0';
5545 5564 } else {
5546 5565 for (; val; val /= 10) {
5547 5566 *end-- = '0' + (val % 10);
5548 5567 }
5549 5568 }
5550 5569
5551 5570 if (i > 0)
5552 5571 *end-- = '.';
5553 5572 }
5554 5573 ASSERT(end + 1 >= base);
5555 5574
↓ open down ↓ |
2004 lines elided |
↑ open up ↑ |
5556 5575 } else if (af == AF_INET6) {
5557 5576 struct in6_addr ip6;
5558 5577 int firstzero, tryzero, numzero, v6end;
5559 5578 uint16_t val;
5560 5579 const char digits[] = "0123456789abcdef";
5561 5580
5562 5581 /*
5563 5582 * Stringify using RFC 1884 convention 2 - 16 bit
5564 5583 * hexadecimal values with a zero-run compression.
5565 5584 * Lower case hexadecimal digits are used.
5566 - * eg, fe80::214:4fff:fe0b:76c8.
5585 + * eg, fe80::214:4fff:fe0b:76c8.
5567 5586 * The IPv4 embedded form is returned for inet_ntop,
5568 5587 * just the IPv4 string is returned for inet_ntoa6.
5569 5588 */
5570 5589
5571 5590 if (!dtrace_canload(tupregs[argi].dttk_value,
5572 5591 sizeof (struct in6_addr), mstate, vstate)) {
5573 5592 regs[rd] = NULL;
5574 5593 break;
5575 5594 }
5576 5595
5577 5596 /*
5578 5597 * Safely load the IPv6 address.
5579 5598 */
5580 5599 dtrace_bcopy(
5581 5600 (void *)(uintptr_t)tupregs[argi].dttk_value,
5582 5601 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
5583 5602
5584 5603 /*
5585 5604 * Check an IPv6 string will fit in scratch.
5586 5605 */
5587 5606 size = INET6_ADDRSTRLEN;
5588 5607 if (!DTRACE_INSCRATCH(mstate, size)) {
5589 5608 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5590 5609 regs[rd] = NULL;
5591 5610 break;
5592 5611 }
5593 5612 base = (char *)mstate->dtms_scratch_ptr;
5594 5613 end = (char *)mstate->dtms_scratch_ptr + size - 1;
5595 5614 *end-- = '\0';
5596 5615
5597 5616 /*
5598 5617 * Find the longest run of 16 bit zero values
5599 5618 * for the single allowed zero compression - "::".
5600 5619 */
5601 5620 firstzero = -1;
5602 5621 tryzero = -1;
5603 5622 numzero = 1;
5604 5623 for (i = 0; i < sizeof (struct in6_addr); i++) {
5605 5624 if (ip6._S6_un._S6_u8[i] == 0 &&
5606 5625 tryzero == -1 && i % 2 == 0) {
5607 5626 tryzero = i;
5608 5627 continue;
5609 5628 }
5610 5629
5611 5630 if (tryzero != -1 &&
5612 5631 (ip6._S6_un._S6_u8[i] != 0 ||
5613 5632 i == sizeof (struct in6_addr) - 1)) {
5614 5633
5615 5634 if (i - tryzero <= numzero) {
5616 5635 tryzero = -1;
5617 5636 continue;
5618 5637 }
5619 5638
5620 5639 firstzero = tryzero;
5621 5640 numzero = i - i % 2 - tryzero;
5622 5641 tryzero = -1;
5623 5642
5624 5643 if (ip6._S6_un._S6_u8[i] == 0 &&
5625 5644 i == sizeof (struct in6_addr) - 1)
5626 5645 numzero += 2;
5627 5646 }
5628 5647 }
5629 5648 ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
5630 5649
5631 5650 /*
5632 5651 * Check for an IPv4 embedded address.
5633 5652 */
5634 5653 v6end = sizeof (struct in6_addr) - 2;
5635 5654 if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
5636 5655 IN6_IS_ADDR_V4COMPAT(&ip6)) {
5637 5656 for (i = sizeof (struct in6_addr) - 1;
5638 5657 i >= DTRACE_V4MAPPED_OFFSET; i--) {
5639 5658 ASSERT(end >= base);
5640 5659
5641 5660 val = ip6._S6_un._S6_u8[i];
5642 5661
5643 5662 if (val == 0) {
5644 5663 *end-- = '0';
5645 5664 } else {
5646 5665 for (; val; val /= 10) {
5647 5666 *end-- = '0' + val % 10;
5648 5667 }
5649 5668 }
5650 5669
5651 5670 if (i > DTRACE_V4MAPPED_OFFSET)
5652 5671 *end-- = '.';
5653 5672 }
5654 5673
5655 5674 if (subr == DIF_SUBR_INET_NTOA6)
5656 5675 goto inetout;
5657 5676
5658 5677 /*
5659 5678 * Set v6end to skip the IPv4 address that
5660 5679 * we have already stringified.
5661 5680 */
5662 5681 v6end = 10;
5663 5682 }
5664 5683
5665 5684 /*
5666 5685 * Build the IPv6 string by working through the
5667 5686 * address in reverse.
5668 5687 */
5669 5688 for (i = v6end; i >= 0; i -= 2) {
5670 5689 ASSERT(end >= base);
5671 5690
5672 5691 if (i == firstzero + numzero - 2) {
5673 5692 *end-- = ':';
5674 5693 *end-- = ':';
5675 5694 i -= numzero - 2;
5676 5695 continue;
5677 5696 }
5678 5697
5679 5698 if (i < 14 && i != firstzero - 2)
5680 5699 *end-- = ':';
5681 5700
5682 5701 val = (ip6._S6_un._S6_u8[i] << 8) +
5683 5702 ip6._S6_un._S6_u8[i + 1];
5684 5703
5685 5704 if (val == 0) {
5686 5705 *end-- = '0';
5687 5706 } else {
5688 5707 for (; val; val /= 16) {
5689 5708 *end-- = digits[val % 16];
5690 5709 }
5691 5710 }
5692 5711 }
5693 5712 ASSERT(end + 1 >= base);
5694 5713
5695 5714 } else {
5696 5715 /*
5697 5716 * The user didn't use AH_INET or AH_INET6.
5698 5717 */
5699 5718 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5700 5719 regs[rd] = NULL;
5701 5720 break;
5702 5721 }
5703 5722
5704 5723 inetout: regs[rd] = (uintptr_t)end + 1;
5705 5724 mstate->dtms_scratch_ptr += size;
5706 5725 break;
5707 5726 }
5708 5727
5709 5728 }
5710 5729 }
5711 5730
5712 5731 /*
5713 5732 * Emulate the execution of DTrace IR instructions specified by the given
5714 5733 * DIF object. This function is deliberately void of assertions as all of
5715 5734 * the necessary checks are handled by a call to dtrace_difo_validate().
5716 5735 */
5717 5736 static uint64_t
5718 5737 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
5719 5738 dtrace_vstate_t *vstate, dtrace_state_t *state)
5720 5739 {
5721 5740 const dif_instr_t *text = difo->dtdo_buf;
5722 5741 const uint_t textlen = difo->dtdo_len;
5723 5742 const char *strtab = difo->dtdo_strtab;
5724 5743 const uint64_t *inttab = difo->dtdo_inttab;
5725 5744
5726 5745 uint64_t rval = 0;
5727 5746 dtrace_statvar_t *svar;
5728 5747 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
5729 5748 dtrace_difv_t *v;
5730 5749 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
5731 5750 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
5732 5751
5733 5752 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
5734 5753 uint64_t regs[DIF_DIR_NREGS];
5735 5754 uint64_t *tmp;
5736 5755
5737 5756 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
5738 5757 int64_t cc_r;
5739 5758 uint_t pc = 0, id, opc;
↓ open down ↓ |
163 lines elided |
↑ open up ↑ |
5740 5759 uint8_t ttop = 0;
5741 5760 dif_instr_t instr;
5742 5761 uint_t r1, r2, rd;
5743 5762
5744 5763 /*
5745 5764 * We stash the current DIF object into the machine state: we need it
5746 5765 * for subsequent access checking.
5747 5766 */
5748 5767 mstate->dtms_difo = difo;
5749 5768
5750 - regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
5769 + regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
5751 5770
5752 5771 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
5753 5772 opc = pc;
5754 5773
5755 5774 instr = text[pc++];
5756 5775 r1 = DIF_INSTR_R1(instr);
5757 5776 r2 = DIF_INSTR_R2(instr);
5758 5777 rd = DIF_INSTR_RD(instr);
5759 5778
5760 5779 switch (DIF_INSTR_OP(instr)) {
5761 5780 case DIF_OP_OR:
5762 5781 regs[rd] = regs[r1] | regs[r2];
5763 5782 break;
5764 5783 case DIF_OP_XOR:
5765 5784 regs[rd] = regs[r1] ^ regs[r2];
5766 5785 break;
5767 5786 case DIF_OP_AND:
5768 5787 regs[rd] = regs[r1] & regs[r2];
5769 5788 break;
5770 5789 case DIF_OP_SLL:
5771 5790 regs[rd] = regs[r1] << regs[r2];
5772 5791 break;
5773 5792 case DIF_OP_SRL:
5774 5793 regs[rd] = regs[r1] >> regs[r2];
5775 5794 break;
5776 5795 case DIF_OP_SUB:
5777 5796 regs[rd] = regs[r1] - regs[r2];
5778 5797 break;
5779 5798 case DIF_OP_ADD:
5780 5799 regs[rd] = regs[r1] + regs[r2];
5781 5800 break;
5782 5801 case DIF_OP_MUL:
5783 5802 regs[rd] = regs[r1] * regs[r2];
5784 5803 break;
5785 5804 case DIF_OP_SDIV:
5786 5805 if (regs[r2] == 0) {
5787 5806 regs[rd] = 0;
5788 5807 *flags |= CPU_DTRACE_DIVZERO;
5789 5808 } else {
5790 5809 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5791 5810 regs[rd] = (int64_t)regs[r1] /
5792 5811 (int64_t)regs[r2];
5793 5812 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5794 5813 }
5795 5814 break;
5796 5815
5797 5816 case DIF_OP_UDIV:
5798 5817 if (regs[r2] == 0) {
5799 5818 regs[rd] = 0;
5800 5819 *flags |= CPU_DTRACE_DIVZERO;
5801 5820 } else {
5802 5821 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5803 5822 regs[rd] = regs[r1] / regs[r2];
5804 5823 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5805 5824 }
5806 5825 break;
5807 5826
5808 5827 case DIF_OP_SREM:
5809 5828 if (regs[r2] == 0) {
5810 5829 regs[rd] = 0;
5811 5830 *flags |= CPU_DTRACE_DIVZERO;
5812 5831 } else {
5813 5832 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5814 5833 regs[rd] = (int64_t)regs[r1] %
5815 5834 (int64_t)regs[r2];
5816 5835 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5817 5836 }
5818 5837 break;
5819 5838
5820 5839 case DIF_OP_UREM:
5821 5840 if (regs[r2] == 0) {
5822 5841 regs[rd] = 0;
5823 5842 *flags |= CPU_DTRACE_DIVZERO;
5824 5843 } else {
5825 5844 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5826 5845 regs[rd] = regs[r1] % regs[r2];
5827 5846 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5828 5847 }
5829 5848 break;
5830 5849
5831 5850 case DIF_OP_NOT:
5832 5851 regs[rd] = ~regs[r1];
5833 5852 break;
5834 5853 case DIF_OP_MOV:
5835 5854 regs[rd] = regs[r1];
5836 5855 break;
5837 5856 case DIF_OP_CMP:
5838 5857 cc_r = regs[r1] - regs[r2];
5839 5858 cc_n = cc_r < 0;
5840 5859 cc_z = cc_r == 0;
5841 5860 cc_v = 0;
5842 5861 cc_c = regs[r1] < regs[r2];
5843 5862 break;
5844 5863 case DIF_OP_TST:
5845 5864 cc_n = cc_v = cc_c = 0;
5846 5865 cc_z = regs[r1] == 0;
5847 5866 break;
5848 5867 case DIF_OP_BA:
5849 5868 pc = DIF_INSTR_LABEL(instr);
5850 5869 break;
5851 5870 case DIF_OP_BE:
5852 5871 if (cc_z)
5853 5872 pc = DIF_INSTR_LABEL(instr);
5854 5873 break;
5855 5874 case DIF_OP_BNE:
5856 5875 if (cc_z == 0)
5857 5876 pc = DIF_INSTR_LABEL(instr);
5858 5877 break;
5859 5878 case DIF_OP_BG:
5860 5879 if ((cc_z | (cc_n ^ cc_v)) == 0)
5861 5880 pc = DIF_INSTR_LABEL(instr);
5862 5881 break;
5863 5882 case DIF_OP_BGU:
5864 5883 if ((cc_c | cc_z) == 0)
5865 5884 pc = DIF_INSTR_LABEL(instr);
5866 5885 break;
5867 5886 case DIF_OP_BGE:
5868 5887 if ((cc_n ^ cc_v) == 0)
5869 5888 pc = DIF_INSTR_LABEL(instr);
5870 5889 break;
5871 5890 case DIF_OP_BGEU:
5872 5891 if (cc_c == 0)
5873 5892 pc = DIF_INSTR_LABEL(instr);
5874 5893 break;
5875 5894 case DIF_OP_BL:
5876 5895 if (cc_n ^ cc_v)
5877 5896 pc = DIF_INSTR_LABEL(instr);
5878 5897 break;
5879 5898 case DIF_OP_BLU:
5880 5899 if (cc_c)
5881 5900 pc = DIF_INSTR_LABEL(instr);
5882 5901 break;
5883 5902 case DIF_OP_BLE:
5884 5903 if (cc_z | (cc_n ^ cc_v))
5885 5904 pc = DIF_INSTR_LABEL(instr);
5886 5905 break;
5887 5906 case DIF_OP_BLEU:
5888 5907 if (cc_c | cc_z)
5889 5908 pc = DIF_INSTR_LABEL(instr);
5890 5909 break;
5891 5910 case DIF_OP_RLDSB:
5892 5911 if (!dtrace_canload(regs[r1], 1, mstate, vstate))
5893 5912 break;
5894 5913 /*FALLTHROUGH*/
5895 5914 case DIF_OP_LDSB:
5896 5915 regs[rd] = (int8_t)dtrace_load8(regs[r1]);
5897 5916 break;
5898 5917 case DIF_OP_RLDSH:
5899 5918 if (!dtrace_canload(regs[r1], 2, mstate, vstate))
5900 5919 break;
5901 5920 /*FALLTHROUGH*/
5902 5921 case DIF_OP_LDSH:
5903 5922 regs[rd] = (int16_t)dtrace_load16(regs[r1]);
5904 5923 break;
5905 5924 case DIF_OP_RLDSW:
5906 5925 if (!dtrace_canload(regs[r1], 4, mstate, vstate))
5907 5926 break;
5908 5927 /*FALLTHROUGH*/
5909 5928 case DIF_OP_LDSW:
5910 5929 regs[rd] = (int32_t)dtrace_load32(regs[r1]);
5911 5930 break;
5912 5931 case DIF_OP_RLDUB:
5913 5932 if (!dtrace_canload(regs[r1], 1, mstate, vstate))
5914 5933 break;
5915 5934 /*FALLTHROUGH*/
5916 5935 case DIF_OP_LDUB:
5917 5936 regs[rd] = dtrace_load8(regs[r1]);
5918 5937 break;
5919 5938 case DIF_OP_RLDUH:
5920 5939 if (!dtrace_canload(regs[r1], 2, mstate, vstate))
5921 5940 break;
5922 5941 /*FALLTHROUGH*/
5923 5942 case DIF_OP_LDUH:
5924 5943 regs[rd] = dtrace_load16(regs[r1]);
5925 5944 break;
5926 5945 case DIF_OP_RLDUW:
5927 5946 if (!dtrace_canload(regs[r1], 4, mstate, vstate))
5928 5947 break;
5929 5948 /*FALLTHROUGH*/
5930 5949 case DIF_OP_LDUW:
5931 5950 regs[rd] = dtrace_load32(regs[r1]);
5932 5951 break;
5933 5952 case DIF_OP_RLDX:
5934 5953 if (!dtrace_canload(regs[r1], 8, mstate, vstate))
5935 5954 break;
5936 5955 /*FALLTHROUGH*/
5937 5956 case DIF_OP_LDX:
5938 5957 regs[rd] = dtrace_load64(regs[r1]);
5939 5958 break;
5940 5959 case DIF_OP_ULDSB:
5941 5960 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5942 5961 regs[rd] = (int8_t)
5943 5962 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5944 5963 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5945 5964 break;
5946 5965 case DIF_OP_ULDSH:
5947 5966 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5948 5967 regs[rd] = (int16_t)
5949 5968 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5950 5969 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5951 5970 break;
5952 5971 case DIF_OP_ULDSW:
5953 5972 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5954 5973 regs[rd] = (int32_t)
5955 5974 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5956 5975 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5957 5976 break;
5958 5977 case DIF_OP_ULDUB:
5959 5978 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5960 5979 regs[rd] =
5961 5980 dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5962 5981 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5963 5982 break;
5964 5983 case DIF_OP_ULDUH:
5965 5984 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5966 5985 regs[rd] =
5967 5986 dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5968 5987 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5969 5988 break;
5970 5989 case DIF_OP_ULDUW:
5971 5990 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5972 5991 regs[rd] =
5973 5992 dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5974 5993 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5975 5994 break;
5976 5995 case DIF_OP_ULDX:
5977 5996 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5978 5997 regs[rd] =
5979 5998 dtrace_fuword64((void *)(uintptr_t)regs[r1]);
5980 5999 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
5981 6000 break;
5982 6001 case DIF_OP_RET:
5983 6002 rval = regs[rd];
5984 6003 pc = textlen;
5985 6004 break;
5986 6005 case DIF_OP_NOP:
5987 6006 break;
5988 6007 case DIF_OP_SETX:
5989 6008 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
5990 6009 break;
5991 6010 case DIF_OP_SETS:
5992 6011 regs[rd] = (uint64_t)(uintptr_t)
5993 6012 (strtab + DIF_INSTR_STRING(instr));
5994 6013 break;
5995 6014 case DIF_OP_SCMP: {
5996 6015 size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
5997 6016 uintptr_t s1 = regs[r1];
5998 6017 uintptr_t s2 = regs[r2];
5999 6018 size_t lim1, lim2;
6000 6019
6001 6020 if (s1 != NULL &&
6002 6021 !dtrace_strcanload(s1, sz, &lim1, mstate, vstate))
6003 6022 break;
6004 6023 if (s2 != NULL &&
6005 6024 !dtrace_strcanload(s2, sz, &lim2, mstate, vstate))
6006 6025 break;
6007 6026
6008 6027 cc_r = dtrace_strncmp((char *)s1, (char *)s2,
6009 6028 MIN(lim1, lim2));
6010 6029
6011 6030 cc_n = cc_r < 0;
6012 6031 cc_z = cc_r == 0;
6013 6032 cc_v = cc_c = 0;
6014 6033 break;
6015 6034 }
6016 6035 case DIF_OP_LDGA:
6017 6036 regs[rd] = dtrace_dif_variable(mstate, state,
6018 6037 r1, regs[r2]);
6019 6038 break;
6020 6039 case DIF_OP_LDGS:
6021 6040 id = DIF_INSTR_VAR(instr);
6022 6041
6023 6042 if (id >= DIF_VAR_OTHER_UBASE) {
6024 6043 uintptr_t a;
6025 6044
6026 6045 id -= DIF_VAR_OTHER_UBASE;
6027 6046 svar = vstate->dtvs_globals[id];
6028 6047 ASSERT(svar != NULL);
6029 6048 v = &svar->dtsv_var;
6030 6049
6031 6050 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
6032 6051 regs[rd] = svar->dtsv_data;
6033 6052 break;
6034 6053 }
6035 6054
6036 6055 a = (uintptr_t)svar->dtsv_data;
6037 6056
6038 6057 if (*(uint8_t *)a == UINT8_MAX) {
6039 6058 /*
6040 6059 * If the 0th byte is set to UINT8_MAX
6041 6060 * then this is to be treated as a
6042 6061 * reference to a NULL variable.
6043 6062 */
6044 6063 regs[rd] = NULL;
6045 6064 } else {
6046 6065 regs[rd] = a + sizeof (uint64_t);
6047 6066 }
6048 6067
6049 6068 break;
6050 6069 }
6051 6070
6052 6071 regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
6053 6072 break;
6054 6073
6055 6074 case DIF_OP_STGA:
6056 6075 dtrace_dif_variable_write(mstate, state, r1, regs[r2],
6057 6076 regs[rd]);
6058 6077 break;
6059 6078
6060 6079 case DIF_OP_STGS:
6061 6080 id = DIF_INSTR_VAR(instr);
6062 6081
6063 6082 ASSERT(id >= DIF_VAR_OTHER_UBASE);
6064 6083 id -= DIF_VAR_OTHER_UBASE;
6065 6084
6066 6085 VERIFY(id < vstate->dtvs_nglobals);
6067 6086 svar = vstate->dtvs_globals[id];
6068 6087 ASSERT(svar != NULL);
6069 6088 v = &svar->dtsv_var;
6070 6089
6071 6090 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6072 6091 uintptr_t a = (uintptr_t)svar->dtsv_data;
6073 6092 size_t lim;
6074 6093
6075 6094 ASSERT(a != NULL);
6076 6095 ASSERT(svar->dtsv_size != 0);
6077 6096
6078 6097 if (regs[rd] == NULL) {
6079 6098 *(uint8_t *)a = UINT8_MAX;
6080 6099 break;
6081 6100 } else {
6082 6101 *(uint8_t *)a = 0;
6083 6102 a += sizeof (uint64_t);
6084 6103 }
6085 6104 if (!dtrace_vcanload(
6086 6105 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
6087 6106 &lim, mstate, vstate))
6088 6107 break;
6089 6108
6090 6109 dtrace_vcopy((void *)(uintptr_t)regs[rd],
6091 6110 (void *)a, &v->dtdv_type, lim);
6092 6111 break;
6093 6112 }
6094 6113
6095 6114 svar->dtsv_data = regs[rd];
6096 6115 break;
6097 6116
6098 6117 case DIF_OP_LDTA:
6099 6118 /*
6100 6119 * There are no DTrace built-in thread-local arrays at
6101 6120 * present. This opcode is saved for future work.
6102 6121 */
6103 6122 *flags |= CPU_DTRACE_ILLOP;
6104 6123 regs[rd] = 0;
6105 6124 break;
6106 6125
6107 6126 case DIF_OP_LDLS:
6108 6127 id = DIF_INSTR_VAR(instr);
6109 6128
6110 6129 if (id < DIF_VAR_OTHER_UBASE) {
6111 6130 /*
6112 6131 * For now, this has no meaning.
6113 6132 */
6114 6133 regs[rd] = 0;
6115 6134 break;
6116 6135 }
6117 6136
6118 6137 id -= DIF_VAR_OTHER_UBASE;
6119 6138
6120 6139 ASSERT(id < vstate->dtvs_nlocals);
6121 6140 ASSERT(vstate->dtvs_locals != NULL);
6122 6141
6123 6142 svar = vstate->dtvs_locals[id];
6124 6143 ASSERT(svar != NULL);
6125 6144 v = &svar->dtsv_var;
6126 6145
6127 6146 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6128 6147 uintptr_t a = (uintptr_t)svar->dtsv_data;
6129 6148 size_t sz = v->dtdv_type.dtdt_size;
6130 6149
6131 6150 sz += sizeof (uint64_t);
6132 6151 ASSERT(svar->dtsv_size == NCPU * sz);
6133 6152 a += CPU->cpu_id * sz;
6134 6153
6135 6154 if (*(uint8_t *)a == UINT8_MAX) {
6136 6155 /*
6137 6156 * If the 0th byte is set to UINT8_MAX
6138 6157 * then this is to be treated as a
6139 6158 * reference to a NULL variable.
6140 6159 */
6141 6160 regs[rd] = NULL;
6142 6161 } else {
6143 6162 regs[rd] = a + sizeof (uint64_t);
6144 6163 }
6145 6164
6146 6165 break;
6147 6166 }
6148 6167
6149 6168 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
6150 6169 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
6151 6170 regs[rd] = tmp[CPU->cpu_id];
6152 6171 break;
6153 6172
6154 6173 case DIF_OP_STLS:
6155 6174 id = DIF_INSTR_VAR(instr);
6156 6175
6157 6176 ASSERT(id >= DIF_VAR_OTHER_UBASE);
6158 6177 id -= DIF_VAR_OTHER_UBASE;
6159 6178 VERIFY(id < vstate->dtvs_nlocals);
6160 6179
6161 6180 ASSERT(vstate->dtvs_locals != NULL);
6162 6181 svar = vstate->dtvs_locals[id];
6163 6182 ASSERT(svar != NULL);
6164 6183 v = &svar->dtsv_var;
6165 6184
6166 6185 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6167 6186 uintptr_t a = (uintptr_t)svar->dtsv_data;
6168 6187 size_t sz = v->dtdv_type.dtdt_size;
6169 6188 size_t lim;
6170 6189
6171 6190 sz += sizeof (uint64_t);
6172 6191 ASSERT(svar->dtsv_size == NCPU * sz);
6173 6192 a += CPU->cpu_id * sz;
6174 6193
6175 6194 if (regs[rd] == NULL) {
6176 6195 *(uint8_t *)a = UINT8_MAX;
6177 6196 break;
6178 6197 } else {
6179 6198 *(uint8_t *)a = 0;
6180 6199 a += sizeof (uint64_t);
6181 6200 }
6182 6201
6183 6202 if (!dtrace_vcanload(
6184 6203 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
6185 6204 &lim, mstate, vstate))
6186 6205 break;
6187 6206
6188 6207 dtrace_vcopy((void *)(uintptr_t)regs[rd],
6189 6208 (void *)a, &v->dtdv_type, lim);
6190 6209 break;
6191 6210 }
6192 6211
6193 6212 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
6194 6213 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
6195 6214 tmp[CPU->cpu_id] = regs[rd];
6196 6215 break;
6197 6216
6198 6217 case DIF_OP_LDTS: {
6199 6218 dtrace_dynvar_t *dvar;
6200 6219 dtrace_key_t *key;
6201 6220
6202 6221 id = DIF_INSTR_VAR(instr);
6203 6222 ASSERT(id >= DIF_VAR_OTHER_UBASE);
6204 6223 id -= DIF_VAR_OTHER_UBASE;
6205 6224 v = &vstate->dtvs_tlocals[id];
6206 6225
6207 6226 key = &tupregs[DIF_DTR_NREGS];
6208 6227 key[0].dttk_value = (uint64_t)id;
6209 6228 key[0].dttk_size = 0;
6210 6229 DTRACE_TLS_THRKEY(key[1].dttk_value);
6211 6230 key[1].dttk_size = 0;
6212 6231
6213 6232 dvar = dtrace_dynvar(dstate, 2, key,
6214 6233 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
6215 6234 mstate, vstate);
6216 6235
6217 6236 if (dvar == NULL) {
6218 6237 regs[rd] = 0;
6219 6238 break;
6220 6239 }
6221 6240
6222 6241 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6223 6242 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
6224 6243 } else {
6225 6244 regs[rd] = *((uint64_t *)dvar->dtdv_data);
6226 6245 }
6227 6246
6228 6247 break;
6229 6248 }
6230 6249
6231 6250 case DIF_OP_STTS: {
6232 6251 dtrace_dynvar_t *dvar;
6233 6252 dtrace_key_t *key;
6234 6253
6235 6254 id = DIF_INSTR_VAR(instr);
6236 6255 ASSERT(id >= DIF_VAR_OTHER_UBASE);
6237 6256 id -= DIF_VAR_OTHER_UBASE;
6238 6257 VERIFY(id < vstate->dtvs_ntlocals);
6239 6258
6240 6259 key = &tupregs[DIF_DTR_NREGS];
6241 6260 key[0].dttk_value = (uint64_t)id;
6242 6261 key[0].dttk_size = 0;
6243 6262 DTRACE_TLS_THRKEY(key[1].dttk_value);
6244 6263 key[1].dttk_size = 0;
6245 6264 v = &vstate->dtvs_tlocals[id];
6246 6265
6247 6266 dvar = dtrace_dynvar(dstate, 2, key,
6248 6267 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
6249 6268 v->dtdv_type.dtdt_size : sizeof (uint64_t),
6250 6269 regs[rd] ? DTRACE_DYNVAR_ALLOC :
6251 6270 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
6252 6271
6253 6272 /*
6254 6273 * Given that we're storing to thread-local data,
6255 6274 * we need to flush our predicate cache.
6256 6275 */
6257 6276 curthread->t_predcache = NULL;
6258 6277
6259 6278 if (dvar == NULL)
6260 6279 break;
6261 6280
6262 6281 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6263 6282 size_t lim;
6264 6283
6265 6284 if (!dtrace_vcanload(
6266 6285 (void *)(uintptr_t)regs[rd],
6267 6286 &v->dtdv_type, &lim, mstate, vstate))
6268 6287 break;
6269 6288
6270 6289 dtrace_vcopy((void *)(uintptr_t)regs[rd],
6271 6290 dvar->dtdv_data, &v->dtdv_type, lim);
6272 6291 } else {
6273 6292 *((uint64_t *)dvar->dtdv_data) = regs[rd];
6274 6293 }
6275 6294
6276 6295 break;
6277 6296 }
6278 6297
6279 6298 case DIF_OP_SRA:
6280 6299 regs[rd] = (int64_t)regs[r1] >> regs[r2];
6281 6300 break;
6282 6301
6283 6302 case DIF_OP_CALL:
6284 6303 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
6285 6304 regs, tupregs, ttop, mstate, state);
6286 6305 break;
6287 6306
6288 6307 case DIF_OP_PUSHTR:
6289 6308 if (ttop == DIF_DTR_NREGS) {
6290 6309 *flags |= CPU_DTRACE_TUPOFLOW;
6291 6310 break;
6292 6311 }
6293 6312
6294 6313 if (r1 == DIF_TYPE_STRING) {
6295 6314 /*
6296 6315 * If this is a string type and the size is 0,
6297 6316 * we'll use the system-wide default string
6298 6317 * size. Note that we are _not_ looking at
6299 6318 * the value of the DTRACEOPT_STRSIZE option;
6300 6319 * had this been set, we would expect to have
6301 6320 * a non-zero size value in the "pushtr".
6302 6321 */
6303 6322 tupregs[ttop].dttk_size =
6304 6323 dtrace_strlen((char *)(uintptr_t)regs[rd],
6305 6324 regs[r2] ? regs[r2] :
6306 6325 dtrace_strsize_default) + 1;
6307 6326 } else {
6308 6327 if (regs[r2] > LONG_MAX) {
6309 6328 *flags |= CPU_DTRACE_ILLOP;
6310 6329 break;
6311 6330 }
6312 6331
6313 6332 tupregs[ttop].dttk_size = regs[r2];
6314 6333 }
6315 6334
6316 6335 tupregs[ttop++].dttk_value = regs[rd];
6317 6336 break;
6318 6337
6319 6338 case DIF_OP_PUSHTV:
6320 6339 if (ttop == DIF_DTR_NREGS) {
6321 6340 *flags |= CPU_DTRACE_TUPOFLOW;
6322 6341 break;
6323 6342 }
6324 6343
6325 6344 tupregs[ttop].dttk_value = regs[rd];
6326 6345 tupregs[ttop++].dttk_size = 0;
6327 6346 break;
6328 6347
6329 6348 case DIF_OP_POPTS:
6330 6349 if (ttop != 0)
6331 6350 ttop--;
6332 6351 break;
6333 6352
6334 6353 case DIF_OP_FLUSHTS:
6335 6354 ttop = 0;
6336 6355 break;
6337 6356
6338 6357 case DIF_OP_LDGAA:
6339 6358 case DIF_OP_LDTAA: {
6340 6359 dtrace_dynvar_t *dvar;
6341 6360 dtrace_key_t *key = tupregs;
6342 6361 uint_t nkeys = ttop;
6343 6362
6344 6363 id = DIF_INSTR_VAR(instr);
6345 6364 ASSERT(id >= DIF_VAR_OTHER_UBASE);
6346 6365 id -= DIF_VAR_OTHER_UBASE;
6347 6366
6348 6367 key[nkeys].dttk_value = (uint64_t)id;
6349 6368 key[nkeys++].dttk_size = 0;
6350 6369
6351 6370 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
6352 6371 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
6353 6372 key[nkeys++].dttk_size = 0;
6354 6373 VERIFY(id < vstate->dtvs_ntlocals);
6355 6374 v = &vstate->dtvs_tlocals[id];
6356 6375 } else {
6357 6376 VERIFY(id < vstate->dtvs_nglobals);
6358 6377 v = &vstate->dtvs_globals[id]->dtsv_var;
6359 6378 }
6360 6379
6361 6380 dvar = dtrace_dynvar(dstate, nkeys, key,
6362 6381 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
6363 6382 v->dtdv_type.dtdt_size : sizeof (uint64_t),
6364 6383 DTRACE_DYNVAR_NOALLOC, mstate, vstate);
6365 6384
6366 6385 if (dvar == NULL) {
6367 6386 regs[rd] = 0;
6368 6387 break;
6369 6388 }
6370 6389
6371 6390 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6372 6391 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
6373 6392 } else {
6374 6393 regs[rd] = *((uint64_t *)dvar->dtdv_data);
6375 6394 }
6376 6395
6377 6396 break;
6378 6397 }
6379 6398
6380 6399 case DIF_OP_STGAA:
6381 6400 case DIF_OP_STTAA: {
6382 6401 dtrace_dynvar_t *dvar;
6383 6402 dtrace_key_t *key = tupregs;
6384 6403 uint_t nkeys = ttop;
6385 6404
6386 6405 id = DIF_INSTR_VAR(instr);
6387 6406 ASSERT(id >= DIF_VAR_OTHER_UBASE);
6388 6407 id -= DIF_VAR_OTHER_UBASE;
6389 6408
6390 6409 key[nkeys].dttk_value = (uint64_t)id;
6391 6410 key[nkeys++].dttk_size = 0;
6392 6411
6393 6412 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
6394 6413 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
6395 6414 key[nkeys++].dttk_size = 0;
6396 6415 VERIFY(id < vstate->dtvs_ntlocals);
6397 6416 v = &vstate->dtvs_tlocals[id];
6398 6417 } else {
6399 6418 VERIFY(id < vstate->dtvs_nglobals);
6400 6419 v = &vstate->dtvs_globals[id]->dtsv_var;
6401 6420 }
6402 6421
6403 6422 dvar = dtrace_dynvar(dstate, nkeys, key,
6404 6423 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
6405 6424 v->dtdv_type.dtdt_size : sizeof (uint64_t),
6406 6425 regs[rd] ? DTRACE_DYNVAR_ALLOC :
6407 6426 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
6408 6427
6409 6428 if (dvar == NULL)
6410 6429 break;
6411 6430
6412 6431 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6413 6432 size_t lim;
6414 6433
6415 6434 if (!dtrace_vcanload(
6416 6435 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
6417 6436 &lim, mstate, vstate))
6418 6437 break;
6419 6438
6420 6439 dtrace_vcopy((void *)(uintptr_t)regs[rd],
6421 6440 dvar->dtdv_data, &v->dtdv_type, lim);
6422 6441 } else {
6423 6442 *((uint64_t *)dvar->dtdv_data) = regs[rd];
6424 6443 }
6425 6444
6426 6445 break;
6427 6446 }
6428 6447
6429 6448 case DIF_OP_ALLOCS: {
6430 6449 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
6431 6450 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
6432 6451
6433 6452 /*
6434 6453 * Rounding up the user allocation size could have
6435 6454 * overflowed large, bogus allocations (like -1ULL) to
6436 6455 * 0.
6437 6456 */
6438 6457 if (size < regs[r1] ||
6439 6458 !DTRACE_INSCRATCH(mstate, size)) {
6440 6459 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
6441 6460 regs[rd] = NULL;
6442 6461 break;
6443 6462 }
6444 6463
6445 6464 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
6446 6465 mstate->dtms_scratch_ptr += size;
6447 6466 regs[rd] = ptr;
6448 6467 break;
6449 6468 }
6450 6469
6451 6470 case DIF_OP_COPYS:
6452 6471 if (!dtrace_canstore(regs[rd], regs[r2],
6453 6472 mstate, vstate)) {
6454 6473 *flags |= CPU_DTRACE_BADADDR;
6455 6474 *illval = regs[rd];
6456 6475 break;
6457 6476 }
6458 6477
6459 6478 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
6460 6479 break;
6461 6480
6462 6481 dtrace_bcopy((void *)(uintptr_t)regs[r1],
6463 6482 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
6464 6483 break;
6465 6484
6466 6485 case DIF_OP_STB:
6467 6486 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
6468 6487 *flags |= CPU_DTRACE_BADADDR;
6469 6488 *illval = regs[rd];
6470 6489 break;
6471 6490 }
6472 6491 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
6473 6492 break;
6474 6493
6475 6494 case DIF_OP_STH:
6476 6495 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
6477 6496 *flags |= CPU_DTRACE_BADADDR;
6478 6497 *illval = regs[rd];
6479 6498 break;
6480 6499 }
6481 6500 if (regs[rd] & 1) {
6482 6501 *flags |= CPU_DTRACE_BADALIGN;
6483 6502 *illval = regs[rd];
6484 6503 break;
6485 6504 }
6486 6505 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
6487 6506 break;
6488 6507
6489 6508 case DIF_OP_STW:
6490 6509 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
6491 6510 *flags |= CPU_DTRACE_BADADDR;
6492 6511 *illval = regs[rd];
6493 6512 break;
6494 6513 }
6495 6514 if (regs[rd] & 3) {
6496 6515 *flags |= CPU_DTRACE_BADALIGN;
6497 6516 *illval = regs[rd];
6498 6517 break;
6499 6518 }
6500 6519 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
6501 6520 break;
6502 6521
6503 6522 case DIF_OP_STX:
6504 6523 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
6505 6524 *flags |= CPU_DTRACE_BADADDR;
6506 6525 *illval = regs[rd];
6507 6526 break;
6508 6527 }
6509 6528 if (regs[rd] & 7) {
6510 6529 *flags |= CPU_DTRACE_BADALIGN;
6511 6530 *illval = regs[rd];
6512 6531 break;
6513 6532 }
6514 6533 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
6515 6534 break;
6516 6535 }
6517 6536 }
6518 6537
6519 6538 if (!(*flags & CPU_DTRACE_FAULT))
6520 6539 return (rval);
6521 6540
6522 6541 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
6523 6542 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
6524 6543
6525 6544 return (0);
6526 6545 }
6527 6546
6528 6547 static void
6529 6548 dtrace_action_breakpoint(dtrace_ecb_t *ecb)
6530 6549 {
6531 6550 dtrace_probe_t *probe = ecb->dte_probe;
6532 6551 dtrace_provider_t *prov = probe->dtpr_provider;
6533 6552 char c[DTRACE_FULLNAMELEN + 80], *str;
6534 6553 char *msg = "dtrace: breakpoint action at probe ";
6535 6554 char *ecbmsg = " (ecb ";
6536 6555 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
6537 6556 uintptr_t val = (uintptr_t)ecb;
6538 6557 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
6539 6558
6540 6559 if (dtrace_destructive_disallow)
6541 6560 return;
6542 6561
6543 6562 /*
6544 6563 * It's impossible to be taking action on the NULL probe.
6545 6564 */
6546 6565 ASSERT(probe != NULL);
6547 6566
6548 6567 /*
6549 6568 * This is a poor man's (destitute man's?) sprintf(): we want to
6550 6569 * print the provider name, module name, function name and name of
6551 6570 * the probe, along with the hex address of the ECB with the breakpoint
6552 6571 * action -- all of which we must place in the character buffer by
6553 6572 * hand.
6554 6573 */
6555 6574 while (*msg != '\0')
6556 6575 c[i++] = *msg++;
6557 6576
6558 6577 for (str = prov->dtpv_name; *str != '\0'; str++)
6559 6578 c[i++] = *str;
6560 6579 c[i++] = ':';
6561 6580
6562 6581 for (str = probe->dtpr_mod; *str != '\0'; str++)
6563 6582 c[i++] = *str;
6564 6583 c[i++] = ':';
6565 6584
6566 6585 for (str = probe->dtpr_func; *str != '\0'; str++)
6567 6586 c[i++] = *str;
6568 6587 c[i++] = ':';
6569 6588
6570 6589 for (str = probe->dtpr_name; *str != '\0'; str++)
6571 6590 c[i++] = *str;
6572 6591
6573 6592 while (*ecbmsg != '\0')
6574 6593 c[i++] = *ecbmsg++;
6575 6594
6576 6595 while (shift >= 0) {
6577 6596 mask = (uintptr_t)0xf << shift;
6578 6597
6579 6598 if (val >= ((uintptr_t)1 << shift))
6580 6599 c[i++] = "0123456789abcdef"[(val & mask) >> shift];
6581 6600 shift -= 4;
6582 6601 }
6583 6602
6584 6603 c[i++] = ')';
6585 6604 c[i] = '\0';
6586 6605
6587 6606 debug_enter(c);
6588 6607 }
6589 6608
6590 6609 static void
6591 6610 dtrace_action_panic(dtrace_ecb_t *ecb)
6592 6611 {
6593 6612 dtrace_probe_t *probe = ecb->dte_probe;
6594 6613
6595 6614 /*
6596 6615 * It's impossible to be taking action on the NULL probe.
6597 6616 */
6598 6617 ASSERT(probe != NULL);
6599 6618
6600 6619 if (dtrace_destructive_disallow)
6601 6620 return;
6602 6621
6603 6622 if (dtrace_panicked != NULL)
6604 6623 return;
6605 6624
6606 6625 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
6607 6626 return;
6608 6627
6609 6628 /*
6610 6629 * We won the right to panic. (We want to be sure that only one
6611 6630 * thread calls panic() from dtrace_probe(), and that panic() is
6612 6631 * called exactly once.)
6613 6632 */
6614 6633 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
6615 6634 probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
6616 6635 probe->dtpr_func, probe->dtpr_name, (void *)ecb);
6617 6636 }
6618 6637
6619 6638 static void
6620 6639 dtrace_action_raise(uint64_t sig)
6621 6640 {
6622 6641 if (dtrace_destructive_disallow)
6623 6642 return;
6624 6643
6625 6644 if (sig >= NSIG) {
6626 6645 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
6627 6646 return;
6628 6647 }
6629 6648
6630 6649 /*
6631 6650 * raise() has a queue depth of 1 -- we ignore all subsequent
6632 6651 * invocations of the raise() action.
6633 6652 */
6634 6653 if (curthread->t_dtrace_sig == 0)
6635 6654 curthread->t_dtrace_sig = (uint8_t)sig;
6636 6655
6637 6656 curthread->t_sig_check = 1;
6638 6657 aston(curthread);
6639 6658 }
6640 6659
6641 6660 static void
6642 6661 dtrace_action_stop(void)
6643 6662 {
6644 6663 if (dtrace_destructive_disallow)
6645 6664 return;
6646 6665
6647 6666 if (!curthread->t_dtrace_stop) {
6648 6667 curthread->t_dtrace_stop = 1;
6649 6668 curthread->t_sig_check = 1;
6650 6669 aston(curthread);
6651 6670 }
6652 6671 }
6653 6672
6654 6673 static void
6655 6674 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
6656 6675 {
6657 6676 hrtime_t now;
6658 6677 volatile uint16_t *flags;
6659 6678 cpu_t *cpu = CPU;
6660 6679
6661 6680 if (dtrace_destructive_disallow)
6662 6681 return;
6663 6682
6664 6683 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
6665 6684
6666 6685 now = dtrace_gethrtime();
6667 6686
6668 6687 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
6669 6688 /*
6670 6689 * We need to advance the mark to the current time.
6671 6690 */
6672 6691 cpu->cpu_dtrace_chillmark = now;
6673 6692 cpu->cpu_dtrace_chilled = 0;
6674 6693 }
6675 6694
6676 6695 /*
6677 6696 * Now check to see if the requested chill time would take us over
6678 6697 * the maximum amount of time allowed in the chill interval. (Or
6679 6698 * worse, if the calculation itself induces overflow.)
6680 6699 */
6681 6700 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
6682 6701 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
6683 6702 *flags |= CPU_DTRACE_ILLOP;
6684 6703 return;
6685 6704 }
6686 6705
6687 6706 while (dtrace_gethrtime() - now < val)
6688 6707 continue;
6689 6708
6690 6709 /*
6691 6710 * Normally, we assure that the value of the variable "timestamp" does
6692 6711 * not change within an ECB. The presence of chill() represents an
6693 6712 * exception to this rule, however.
6694 6713 */
6695 6714 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
6696 6715 cpu->cpu_dtrace_chilled += val;
6697 6716 }
6698 6717
6699 6718 static void
6700 6719 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
6701 6720 uint64_t *buf, uint64_t arg)
6702 6721 {
6703 6722 int nframes = DTRACE_USTACK_NFRAMES(arg);
6704 6723 int strsize = DTRACE_USTACK_STRSIZE(arg);
6705 6724 uint64_t *pcs = &buf[1], *fps;
6706 6725 char *str = (char *)&pcs[nframes];
6707 6726 int size, offs = 0, i, j;
6708 6727 size_t rem;
6709 6728 uintptr_t old = mstate->dtms_scratch_ptr, saved;
6710 6729 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
6711 6730 char *sym;
6712 6731
6713 6732 /*
6714 6733 * Should be taking a faster path if string space has not been
6715 6734 * allocated.
6716 6735 */
6717 6736 ASSERT(strsize != 0);
6718 6737
6719 6738 /*
6720 6739 * We will first allocate some temporary space for the frame pointers.
6721 6740 */
6722 6741 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
6723 6742 size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
6724 6743 (nframes * sizeof (uint64_t));
6725 6744
6726 6745 if (!DTRACE_INSCRATCH(mstate, size)) {
6727 6746 /*
6728 6747 * Not enough room for our frame pointers -- need to indicate
6729 6748 * that we ran out of scratch space.
6730 6749 */
6731 6750 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
6732 6751 return;
6733 6752 }
6734 6753
6735 6754 mstate->dtms_scratch_ptr += size;
6736 6755 saved = mstate->dtms_scratch_ptr;
6737 6756
6738 6757 /*
6739 6758 * Now get a stack with both program counters and frame pointers.
6740 6759 */
6741 6760 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6742 6761 dtrace_getufpstack(buf, fps, nframes + 1);
6743 6762 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6744 6763
6745 6764 /*
6746 6765 * If that faulted, we're cooked.
6747 6766 */
6748 6767 if (*flags & CPU_DTRACE_FAULT)
6749 6768 goto out;
6750 6769
6751 6770 /*
6752 6771 * Now we want to walk up the stack, calling the USTACK helper. For
6753 6772 * each iteration, we restore the scratch pointer.
6754 6773 */
6755 6774 for (i = 0; i < nframes; i++) {
6756 6775 mstate->dtms_scratch_ptr = saved;
6757 6776
6758 6777 if (offs >= strsize)
6759 6778 break;
6760 6779
6761 6780 sym = (char *)(uintptr_t)dtrace_helper(
6762 6781 DTRACE_HELPER_ACTION_USTACK,
6763 6782 mstate, state, pcs[i], fps[i]);
6764 6783
6765 6784 /*
6766 6785 * If we faulted while running the helper, we're going to
6767 6786 * clear the fault and null out the corresponding string.
6768 6787 */
6769 6788 if (*flags & CPU_DTRACE_FAULT) {
6770 6789 *flags &= ~CPU_DTRACE_FAULT;
6771 6790 str[offs++] = '\0';
6772 6791 continue;
6773 6792 }
6774 6793
6775 6794 if (sym == NULL) {
6776 6795 str[offs++] = '\0';
6777 6796 continue;
6778 6797 }
6779 6798
6780 6799 if (!dtrace_strcanload((uintptr_t)sym, strsize, &rem, mstate,
6781 6800 &(state->dts_vstate))) {
6782 6801 str[offs++] = '\0';
6783 6802 continue;
6784 6803 }
6785 6804
6786 6805 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6787 6806
6788 6807 /*
6789 6808 * Now copy in the string that the helper returned to us.
6790 6809 */
6791 6810 for (j = 0; offs + j < strsize && j < rem; j++) {
6792 6811 if ((str[offs + j] = sym[j]) == '\0')
6793 6812 break;
6794 6813 }
6795 6814
6796 6815 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6797 6816
6798 6817 offs += j + 1;
6799 6818 }
6800 6819
6801 6820 if (offs >= strsize) {
6802 6821 /*
6803 6822 * If we didn't have room for all of the strings, we don't
6804 6823 * abort processing -- this needn't be a fatal error -- but we
6805 6824 * still want to increment a counter (dts_stkstroverflows) to
6806 6825 * allow this condition to be warned about. (If this is from
6807 6826 * a jstack() action, it is easily tuned via jstackstrsize.)
6808 6827 */
6809 6828 dtrace_error(&state->dts_stkstroverflows);
6810 6829 }
6811 6830
6812 6831 while (offs < strsize)
6813 6832 str[offs++] = '\0';
6814 6833
6815 6834 out:
6816 6835 mstate->dtms_scratch_ptr = old;
6817 6836 }
6818 6837
6819 6838 static void
6820 6839 dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
6821 6840 size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
6822 6841 {
6823 6842 volatile uint16_t *flags;
6824 6843 uint64_t val = *valp;
6825 6844 size_t valoffs = *valoffsp;
6826 6845
6827 6846 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
6828 6847 ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
6829 6848
6830 6849 /*
6831 6850 * If this is a string, we're going to only load until we find the zero
6832 6851 * byte -- after which we'll store zero bytes.
6833 6852 */
6834 6853 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
6835 6854 char c = '\0' + 1;
6836 6855 size_t s;
6837 6856
6838 6857 for (s = 0; s < size; s++) {
6839 6858 if (c != '\0' && dtkind == DIF_TF_BYREF) {
6840 6859 c = dtrace_load8(val++);
6841 6860 } else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
6842 6861 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6843 6862 c = dtrace_fuword8((void *)(uintptr_t)val++);
6844 6863 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6845 6864 if (*flags & CPU_DTRACE_FAULT)
6846 6865 break;
6847 6866 }
6848 6867
6849 6868 DTRACE_STORE(uint8_t, tomax, valoffs++, c);
6850 6869
6851 6870 if (c == '\0' && intuple)
6852 6871 break;
6853 6872 }
6854 6873 } else {
6855 6874 uint8_t c;
6856 6875 while (valoffs < end) {
6857 6876 if (dtkind == DIF_TF_BYREF) {
6858 6877 c = dtrace_load8(val++);
6859 6878 } else if (dtkind == DIF_TF_BYUREF) {
6860 6879 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6861 6880 c = dtrace_fuword8((void *)(uintptr_t)val++);
6862 6881 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6863 6882 if (*flags & CPU_DTRACE_FAULT)
6864 6883 break;
6865 6884 }
6866 6885
6867 6886 DTRACE_STORE(uint8_t, tomax,
6868 6887 valoffs++, c);
6869 6888 }
6870 6889 }
6871 6890
6872 6891 *valp = val;
6873 6892 *valoffsp = valoffs;
6874 6893 }
6875 6894
6876 6895 /*
6877 6896 * If you're looking for the epicenter of DTrace, you just found it. This
6878 6897 * is the function called by the provider to fire a probe -- from which all
6879 6898 * subsequent probe-context DTrace activity emanates.
6880 6899 */
6881 6900 void
6882 6901 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
6883 6902 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
6884 6903 {
6885 6904 processorid_t cpuid;
6886 6905 dtrace_icookie_t cookie;
6887 6906 dtrace_probe_t *probe;
6888 6907 dtrace_mstate_t mstate;
6889 6908 dtrace_ecb_t *ecb;
6890 6909 dtrace_action_t *act;
6891 6910 intptr_t offs;
6892 6911 size_t size;
6893 6912 int vtime, onintr;
6894 6913 volatile uint16_t *flags;
6895 6914 hrtime_t now, end;
6896 6915
6897 6916 /*
6898 6917 * Kick out immediately if this CPU is still being born (in which case
6899 6918 * curthread will be set to -1) or the current thread can't allow
6900 6919 * probes in its current context.
6901 6920 */
6902 6921 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
6903 6922 return;
6904 6923
6905 6924 cookie = dtrace_interrupt_disable();
6906 6925 probe = dtrace_probes[id - 1];
6907 6926 cpuid = CPU->cpu_id;
6908 6927 onintr = CPU_ON_INTR(CPU);
6909 6928
6910 6929 CPU->cpu_dtrace_probes++;
6911 6930
6912 6931 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
6913 6932 probe->dtpr_predcache == curthread->t_predcache) {
6914 6933 /*
6915 6934 * We have hit in the predicate cache; we know that
6916 6935 * this predicate would evaluate to be false.
6917 6936 */
6918 6937 dtrace_interrupt_enable(cookie);
6919 6938 return;
6920 6939 }
6921 6940
6922 6941 if (panic_quiesce) {
6923 6942 /*
6924 6943 * We don't trace anything if we're panicking.
6925 6944 */
6926 6945 dtrace_interrupt_enable(cookie);
6927 6946 return;
6928 6947 }
6929 6948
6930 6949 now = mstate.dtms_timestamp = dtrace_gethrtime();
6931 6950 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
6932 6951 vtime = dtrace_vtime_references != 0;
6933 6952
6934 6953 if (vtime && curthread->t_dtrace_start)
6935 6954 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
6936 6955
6937 6956 mstate.dtms_difo = NULL;
6938 6957 mstate.dtms_probe = probe;
6939 6958 mstate.dtms_strtok = NULL;
6940 6959 mstate.dtms_arg[0] = arg0;
6941 6960 mstate.dtms_arg[1] = arg1;
6942 6961 mstate.dtms_arg[2] = arg2;
6943 6962 mstate.dtms_arg[3] = arg3;
6944 6963 mstate.dtms_arg[4] = arg4;
6945 6964
6946 6965 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
6947 6966
6948 6967 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
6949 6968 dtrace_predicate_t *pred = ecb->dte_predicate;
6950 6969 dtrace_state_t *state = ecb->dte_state;
6951 6970 dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
6952 6971 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
6953 6972 dtrace_vstate_t *vstate = &state->dts_vstate;
6954 6973 dtrace_provider_t *prov = probe->dtpr_provider;
6955 6974 uint64_t tracememsize = 0;
6956 6975 int committed = 0;
6957 6976 caddr_t tomax;
6958 6977
6959 6978 /*
6960 6979 * A little subtlety with the following (seemingly innocuous)
6961 6980 * declaration of the automatic 'val': by looking at the
6962 6981 * code, you might think that it could be declared in the
6963 6982 * action processing loop, below. (That is, it's only used in
6964 6983 * the action processing loop.) However, it must be declared
6965 6984 * out of that scope because in the case of DIF expression
6966 6985 * arguments to aggregating actions, one iteration of the
6967 6986 * action loop will use the last iteration's value.
6968 6987 */
6969 6988 #ifdef lint
6970 6989 uint64_t val = 0;
6971 6990 #else
6972 6991 uint64_t val;
6973 6992 #endif
6974 6993
6975 6994 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
6976 6995 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
6977 6996 mstate.dtms_getf = NULL;
6978 6997
6979 6998 *flags &= ~CPU_DTRACE_ERROR;
6980 6999
6981 7000 if (prov == dtrace_provider) {
6982 7001 /*
6983 7002 * If dtrace itself is the provider of this probe,
6984 7003 * we're only going to continue processing the ECB if
6985 7004 * arg0 (the dtrace_state_t) is equal to the ECB's
6986 7005 * creating state. (This prevents disjoint consumers
6987 7006 * from seeing one another's metaprobes.)
6988 7007 */
6989 7008 if (arg0 != (uint64_t)(uintptr_t)state)
6990 7009 continue;
6991 7010 }
6992 7011
6993 7012 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
6994 7013 /*
6995 7014 * We're not currently active. If our provider isn't
6996 7015 * the dtrace pseudo provider, we're not interested.
6997 7016 */
6998 7017 if (prov != dtrace_provider)
6999 7018 continue;
7000 7019
7001 7020 /*
7002 7021 * Now we must further check if we are in the BEGIN
7003 7022 * probe. If we are, we will only continue processing
7004 7023 * if we're still in WARMUP -- if one BEGIN enabling
7005 7024 * has invoked the exit() action, we don't want to
7006 7025 * evaluate subsequent BEGIN enablings.
7007 7026 */
7008 7027 if (probe->dtpr_id == dtrace_probeid_begin &&
7009 7028 state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
7010 7029 ASSERT(state->dts_activity ==
7011 7030 DTRACE_ACTIVITY_DRAINING);
7012 7031 continue;
7013 7032 }
7014 7033 }
7015 7034
7016 7035 if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
7017 7036 continue;
7018 7037
7019 7038 if (now - state->dts_alive > dtrace_deadman_timeout) {
7020 7039 /*
7021 7040 * We seem to be dead. Unless we (a) have kernel
7022 7041 * destructive permissions (b) have explicitly enabled
7023 7042 * destructive actions and (c) destructive actions have
7024 7043 * not been disabled, we're going to transition into
7025 7044 * the KILLED state, from which no further processing
7026 7045 * on this state will be performed.
7027 7046 */
7028 7047 if (!dtrace_priv_kernel_destructive(state) ||
7029 7048 !state->dts_cred.dcr_destructive ||
7030 7049 dtrace_destructive_disallow) {
7031 7050 void *activity = &state->dts_activity;
7032 7051 dtrace_activity_t current;
7033 7052
7034 7053 do {
7035 7054 current = state->dts_activity;
7036 7055 } while (dtrace_cas32(activity, current,
7037 7056 DTRACE_ACTIVITY_KILLED) != current);
7038 7057
7039 7058 continue;
7040 7059 }
7041 7060 }
7042 7061
7043 7062 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
7044 7063 ecb->dte_alignment, state, &mstate)) < 0)
7045 7064 continue;
7046 7065
7047 7066 tomax = buf->dtb_tomax;
7048 7067 ASSERT(tomax != NULL);
7049 7068
7050 7069 if (ecb->dte_size != 0) {
7051 7070 dtrace_rechdr_t dtrh;
7052 7071 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
7053 7072 mstate.dtms_timestamp = dtrace_gethrtime();
7054 7073 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
7055 7074 }
7056 7075 ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
7057 7076 dtrh.dtrh_epid = ecb->dte_epid;
7058 7077 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
7059 7078 mstate.dtms_timestamp);
7060 7079 *((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
7061 7080 }
7062 7081
7063 7082 mstate.dtms_epid = ecb->dte_epid;
7064 7083 mstate.dtms_present |= DTRACE_MSTATE_EPID;
7065 7084
7066 7085 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
7067 7086 mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
7068 7087
7069 7088 if (pred != NULL) {
7070 7089 dtrace_difo_t *dp = pred->dtp_difo;
7071 7090 int rval;
7072 7091
7073 7092 rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
7074 7093
7075 7094 if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
7076 7095 dtrace_cacheid_t cid = probe->dtpr_predcache;
7077 7096
7078 7097 if (cid != DTRACE_CACHEIDNONE && !onintr) {
7079 7098 /*
7080 7099 * Update the predicate cache...
7081 7100 */
7082 7101 ASSERT(cid == pred->dtp_cacheid);
7083 7102 curthread->t_predcache = cid;
7084 7103 }
7085 7104
7086 7105 continue;
7087 7106 }
7088 7107 }
7089 7108
7090 7109 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
7091 7110 act != NULL; act = act->dta_next) {
7092 7111 size_t valoffs;
7093 7112 dtrace_difo_t *dp;
7094 7113 dtrace_recdesc_t *rec = &act->dta_rec;
7095 7114
7096 7115 size = rec->dtrd_size;
7097 7116 valoffs = offs + rec->dtrd_offset;
7098 7117
7099 7118 if (DTRACEACT_ISAGG(act->dta_kind)) {
7100 7119 uint64_t v = 0xbad;
7101 7120 dtrace_aggregation_t *agg;
7102 7121
7103 7122 agg = (dtrace_aggregation_t *)act;
7104 7123
7105 7124 if ((dp = act->dta_difo) != NULL)
7106 7125 v = dtrace_dif_emulate(dp,
7107 7126 &mstate, vstate, state);
7108 7127
7109 7128 if (*flags & CPU_DTRACE_ERROR)
7110 7129 continue;
7111 7130
7112 7131 /*
7113 7132 * Note that we always pass the expression
7114 7133 * value from the previous iteration of the
7115 7134 * action loop. This value will only be used
7116 7135 * if there is an expression argument to the
7117 7136 * aggregating action, denoted by the
7118 7137 * dtag_hasarg field.
7119 7138 */
7120 7139 dtrace_aggregate(agg, buf,
7121 7140 offs, aggbuf, v, val);
7122 7141 continue;
7123 7142 }
7124 7143
7125 7144 switch (act->dta_kind) {
7126 7145 case DTRACEACT_STOP:
7127 7146 if (dtrace_priv_proc_destructive(state,
7128 7147 &mstate))
7129 7148 dtrace_action_stop();
7130 7149 continue;
7131 7150
7132 7151 case DTRACEACT_BREAKPOINT:
7133 7152 if (dtrace_priv_kernel_destructive(state))
7134 7153 dtrace_action_breakpoint(ecb);
7135 7154 continue;
7136 7155
7137 7156 case DTRACEACT_PANIC:
7138 7157 if (dtrace_priv_kernel_destructive(state))
7139 7158 dtrace_action_panic(ecb);
7140 7159 continue;
7141 7160
7142 7161 case DTRACEACT_STACK:
7143 7162 if (!dtrace_priv_kernel(state))
7144 7163 continue;
7145 7164
7146 7165 dtrace_getpcstack((pc_t *)(tomax + valoffs),
7147 7166 size / sizeof (pc_t), probe->dtpr_aframes,
7148 7167 DTRACE_ANCHORED(probe) ? NULL :
7149 7168 (uint32_t *)arg0);
7150 7169
7151 7170 continue;
7152 7171
7153 7172 case DTRACEACT_JSTACK:
7154 7173 case DTRACEACT_USTACK:
7155 7174 if (!dtrace_priv_proc(state, &mstate))
7156 7175 continue;
7157 7176
7158 7177 /*
7159 7178 * See comment in DIF_VAR_PID.
7160 7179 */
7161 7180 if (DTRACE_ANCHORED(mstate.dtms_probe) &&
7162 7181 CPU_ON_INTR(CPU)) {
7163 7182 int depth = DTRACE_USTACK_NFRAMES(
7164 7183 rec->dtrd_arg) + 1;
7165 7184
7166 7185 dtrace_bzero((void *)(tomax + valoffs),
7167 7186 DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
7168 7187 + depth * sizeof (uint64_t));
7169 7188
7170 7189 continue;
7171 7190 }
7172 7191
7173 7192 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
7174 7193 curproc->p_dtrace_helpers != NULL) {
7175 7194 /*
7176 7195 * This is the slow path -- we have
7177 7196 * allocated string space, and we're
7178 7197 * getting the stack of a process that
7179 7198 * has helpers. Call into a separate
7180 7199 * routine to perform this processing.
7181 7200 */
7182 7201 dtrace_action_ustack(&mstate, state,
7183 7202 (uint64_t *)(tomax + valoffs),
7184 7203 rec->dtrd_arg);
7185 7204 continue;
7186 7205 }
7187 7206
7188 7207 /*
7189 7208 * Clear the string space, since there's no
7190 7209 * helper to do it for us.
7191 7210 */
7192 7211 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
7193 7212 int depth = DTRACE_USTACK_NFRAMES(
7194 7213 rec->dtrd_arg);
7195 7214 size_t strsize = DTRACE_USTACK_STRSIZE(
7196 7215 rec->dtrd_arg);
7197 7216 uint64_t *buf = (uint64_t *)(tomax +
7198 7217 valoffs);
7199 7218 void *strspace = &buf[depth + 1];
7200 7219
7201 7220 dtrace_bzero(strspace,
7202 7221 MIN(depth, strsize));
7203 7222 }
7204 7223
7205 7224 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
7206 7225 dtrace_getupcstack((uint64_t *)
7207 7226 (tomax + valoffs),
7208 7227 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
7209 7228 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
7210 7229 continue;
7211 7230
7212 7231 default:
7213 7232 break;
7214 7233 }
7215 7234
7216 7235 dp = act->dta_difo;
7217 7236 ASSERT(dp != NULL);
7218 7237
7219 7238 val = dtrace_dif_emulate(dp, &mstate, vstate, state);
7220 7239
7221 7240 if (*flags & CPU_DTRACE_ERROR)
7222 7241 continue;
7223 7242
7224 7243 switch (act->dta_kind) {
7225 7244 case DTRACEACT_SPECULATE: {
7226 7245 dtrace_rechdr_t *dtrh;
7227 7246
7228 7247 ASSERT(buf == &state->dts_buffer[cpuid]);
7229 7248 buf = dtrace_speculation_buffer(state,
7230 7249 cpuid, val);
7231 7250
7232 7251 if (buf == NULL) {
7233 7252 *flags |= CPU_DTRACE_DROP;
7234 7253 continue;
7235 7254 }
7236 7255
7237 7256 offs = dtrace_buffer_reserve(buf,
7238 7257 ecb->dte_needed, ecb->dte_alignment,
7239 7258 state, NULL);
7240 7259
7241 7260 if (offs < 0) {
7242 7261 *flags |= CPU_DTRACE_DROP;
7243 7262 continue;
7244 7263 }
7245 7264
7246 7265 tomax = buf->dtb_tomax;
7247 7266 ASSERT(tomax != NULL);
7248 7267
7249 7268 if (ecb->dte_size == 0)
7250 7269 continue;
7251 7270
7252 7271 ASSERT3U(ecb->dte_size, >=,
7253 7272 sizeof (dtrace_rechdr_t));
7254 7273 dtrh = ((void *)(tomax + offs));
7255 7274 dtrh->dtrh_epid = ecb->dte_epid;
7256 7275 /*
7257 7276 * When the speculation is committed, all of
7258 7277 * the records in the speculative buffer will
7259 7278 * have their timestamps set to the commit
7260 7279 * time. Until then, it is set to a sentinel
7261 7280 * value, for debugability.
7262 7281 */
7263 7282 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
7264 7283 continue;
7265 7284 }
7266 7285
7267 7286 case DTRACEACT_CHILL:
7268 7287 if (dtrace_priv_kernel_destructive(state))
7269 7288 dtrace_action_chill(&mstate, val);
7270 7289 continue;
7271 7290
7272 7291 case DTRACEACT_RAISE:
7273 7292 if (dtrace_priv_proc_destructive(state,
7274 7293 &mstate))
7275 7294 dtrace_action_raise(val);
7276 7295 continue;
7277 7296
7278 7297 case DTRACEACT_COMMIT:
7279 7298 ASSERT(!committed);
7280 7299
7281 7300 /*
7282 7301 * We need to commit our buffer state.
7283 7302 */
7284 7303 if (ecb->dte_size)
7285 7304 buf->dtb_offset = offs + ecb->dte_size;
7286 7305 buf = &state->dts_buffer[cpuid];
7287 7306 dtrace_speculation_commit(state, cpuid, val);
7288 7307 committed = 1;
7289 7308 continue;
7290 7309
7291 7310 case DTRACEACT_DISCARD:
7292 7311 dtrace_speculation_discard(state, cpuid, val);
7293 7312 continue;
7294 7313
7295 7314 case DTRACEACT_DIFEXPR:
7296 7315 case DTRACEACT_LIBACT:
7297 7316 case DTRACEACT_PRINTF:
7298 7317 case DTRACEACT_PRINTA:
7299 7318 case DTRACEACT_SYSTEM:
7300 7319 case DTRACEACT_FREOPEN:
7301 7320 case DTRACEACT_TRACEMEM:
7302 7321 break;
7303 7322
7304 7323 case DTRACEACT_TRACEMEM_DYNSIZE:
7305 7324 tracememsize = val;
7306 7325 break;
7307 7326
7308 7327 case DTRACEACT_SYM:
7309 7328 case DTRACEACT_MOD:
7310 7329 if (!dtrace_priv_kernel(state))
7311 7330 continue;
7312 7331 break;
7313 7332
7314 7333 case DTRACEACT_USYM:
7315 7334 case DTRACEACT_UMOD:
7316 7335 case DTRACEACT_UADDR: {
7317 7336 struct pid *pid = curthread->t_procp->p_pidp;
7318 7337
7319 7338 if (!dtrace_priv_proc(state, &mstate))
7320 7339 continue;
7321 7340
7322 7341 DTRACE_STORE(uint64_t, tomax,
7323 7342 valoffs, (uint64_t)pid->pid_id);
7324 7343 DTRACE_STORE(uint64_t, tomax,
7325 7344 valoffs + sizeof (uint64_t), val);
7326 7345
7327 7346 continue;
7328 7347 }
7329 7348
7330 7349 case DTRACEACT_EXIT: {
7331 7350 /*
7332 7351 * For the exit action, we are going to attempt
7333 7352 * to atomically set our activity to be
7334 7353 * draining. If this fails (either because
7335 7354 * another CPU has beat us to the exit action,
7336 7355 * or because our current activity is something
7337 7356 * other than ACTIVE or WARMUP), we will
7338 7357 * continue. This assures that the exit action
7339 7358 * can be successfully recorded at most once
7340 7359 * when we're in the ACTIVE state. If we're
7341 7360 * encountering the exit() action while in
7342 7361 * COOLDOWN, however, we want to honor the new
7343 7362 * status code. (We know that we're the only
7344 7363 * thread in COOLDOWN, so there is no race.)
7345 7364 */
7346 7365 void *activity = &state->dts_activity;
7347 7366 dtrace_activity_t current = state->dts_activity;
7348 7367
7349 7368 if (current == DTRACE_ACTIVITY_COOLDOWN)
7350 7369 break;
7351 7370
7352 7371 if (current != DTRACE_ACTIVITY_WARMUP)
7353 7372 current = DTRACE_ACTIVITY_ACTIVE;
7354 7373
7355 7374 if (dtrace_cas32(activity, current,
7356 7375 DTRACE_ACTIVITY_DRAINING) != current) {
7357 7376 *flags |= CPU_DTRACE_DROP;
7358 7377 continue;
7359 7378 }
7360 7379
7361 7380 break;
7362 7381 }
7363 7382
7364 7383 default:
7365 7384 ASSERT(0);
7366 7385 }
7367 7386
7368 7387 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ||
7369 7388 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) {
7370 7389 uintptr_t end = valoffs + size;
7371 7390
7372 7391 if (tracememsize != 0 &&
7373 7392 valoffs + tracememsize < end) {
7374 7393 end = valoffs + tracememsize;
7375 7394 tracememsize = 0;
7376 7395 }
7377 7396
7378 7397 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
7379 7398 !dtrace_vcanload((void *)(uintptr_t)val,
7380 7399 &dp->dtdo_rtype, NULL, &mstate, vstate))
7381 7400 continue;
7382 7401
7383 7402 dtrace_store_by_ref(dp, tomax, size, &valoffs,
7384 7403 &val, end, act->dta_intuple,
7385 7404 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
7386 7405 DIF_TF_BYREF: DIF_TF_BYUREF);
7387 7406 continue;
7388 7407 }
7389 7408
7390 7409 switch (size) {
7391 7410 case 0:
7392 7411 break;
7393 7412
7394 7413 case sizeof (uint8_t):
7395 7414 DTRACE_STORE(uint8_t, tomax, valoffs, val);
7396 7415 break;
7397 7416 case sizeof (uint16_t):
7398 7417 DTRACE_STORE(uint16_t, tomax, valoffs, val);
7399 7418 break;
7400 7419 case sizeof (uint32_t):
7401 7420 DTRACE_STORE(uint32_t, tomax, valoffs, val);
7402 7421 break;
7403 7422 case sizeof (uint64_t):
7404 7423 DTRACE_STORE(uint64_t, tomax, valoffs, val);
7405 7424 break;
7406 7425 default:
7407 7426 /*
7408 7427 * Any other size should have been returned by
7409 7428 * reference, not by value.
7410 7429 */
7411 7430 ASSERT(0);
7412 7431 break;
7413 7432 }
7414 7433 }
7415 7434
7416 7435 if (*flags & CPU_DTRACE_DROP)
7417 7436 continue;
7418 7437
7419 7438 if (*flags & CPU_DTRACE_FAULT) {
7420 7439 int ndx;
7421 7440 dtrace_action_t *err;
7422 7441
7423 7442 buf->dtb_errors++;
7424 7443
7425 7444 if (probe->dtpr_id == dtrace_probeid_error) {
7426 7445 /*
7427 7446 * There's nothing we can do -- we had an
7428 7447 * error on the error probe. We bump an
7429 7448 * error counter to at least indicate that
7430 7449 * this condition happened.
7431 7450 */
7432 7451 dtrace_error(&state->dts_dblerrors);
7433 7452 continue;
7434 7453 }
7435 7454
7436 7455 if (vtime) {
7437 7456 /*
7438 7457 * Before recursing on dtrace_probe(), we
7439 7458 * need to explicitly clear out our start
7440 7459 * time to prevent it from being accumulated
7441 7460 * into t_dtrace_vtime.
7442 7461 */
7443 7462 curthread->t_dtrace_start = 0;
7444 7463 }
7445 7464
7446 7465 /*
7447 7466 * Iterate over the actions to figure out which action
7448 7467 * we were processing when we experienced the error.
7449 7468 * Note that act points _past_ the faulting action; if
7450 7469 * act is ecb->dte_action, the fault was in the
7451 7470 * predicate, if it's ecb->dte_action->dta_next it's
7452 7471 * in action #1, and so on.
7453 7472 */
7454 7473 for (err = ecb->dte_action, ndx = 0;
7455 7474 err != act; err = err->dta_next, ndx++)
7456 7475 continue;
7457 7476
7458 7477 dtrace_probe_error(state, ecb->dte_epid, ndx,
7459 7478 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
7460 7479 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
7461 7480 cpu_core[cpuid].cpuc_dtrace_illval);
7462 7481
7463 7482 continue;
7464 7483 }
7465 7484
7466 7485 if (!committed)
7467 7486 buf->dtb_offset = offs + ecb->dte_size;
7468 7487 }
7469 7488
7470 7489 end = dtrace_gethrtime();
7471 7490 if (vtime)
7472 7491 curthread->t_dtrace_start = end;
7473 7492
7474 7493 CPU->cpu_dtrace_nsec += end - now;
7475 7494
7476 7495 dtrace_interrupt_enable(cookie);
7477 7496 }
7478 7497
7479 7498 /*
7480 7499 * DTrace Probe Hashing Functions
7481 7500 *
7482 7501 * The functions in this section (and indeed, the functions in remaining
7483 7502 * sections) are not _called_ from probe context. (Any exceptions to this are
7484 7503 * marked with a "Note:".) Rather, they are called from elsewhere in the
7485 7504 * DTrace framework to look-up probes in, add probes to and remove probes from
7486 7505 * the DTrace probe hashes. (Each probe is hashed by each element of the
7487 7506 * probe tuple -- allowing for fast lookups, regardless of what was
7488 7507 * specified.)
7489 7508 */
7490 7509 static uint_t
7491 7510 dtrace_hash_str(char *p)
7492 7511 {
7493 7512 unsigned int g;
7494 7513 uint_t hval = 0;
7495 7514
7496 7515 while (*p) {
7497 7516 hval = (hval << 4) + *p++;
7498 7517 if ((g = (hval & 0xf0000000)) != 0)
7499 7518 hval ^= g >> 24;
7500 7519 hval &= ~g;
7501 7520 }
7502 7521 return (hval);
7503 7522 }
7504 7523
7505 7524 static dtrace_hash_t *
7506 7525 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
7507 7526 {
7508 7527 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
7509 7528
7510 7529 hash->dth_stroffs = stroffs;
7511 7530 hash->dth_nextoffs = nextoffs;
7512 7531 hash->dth_prevoffs = prevoffs;
7513 7532
7514 7533 hash->dth_size = 1;
7515 7534 hash->dth_mask = hash->dth_size - 1;
7516 7535
7517 7536 hash->dth_tab = kmem_zalloc(hash->dth_size *
7518 7537 sizeof (dtrace_hashbucket_t *), KM_SLEEP);
7519 7538
7520 7539 return (hash);
7521 7540 }
7522 7541
7523 7542 static void
7524 7543 dtrace_hash_destroy(dtrace_hash_t *hash)
7525 7544 {
7526 7545 #ifdef DEBUG
7527 7546 int i;
7528 7547
7529 7548 for (i = 0; i < hash->dth_size; i++)
7530 7549 ASSERT(hash->dth_tab[i] == NULL);
7531 7550 #endif
7532 7551
7533 7552 kmem_free(hash->dth_tab,
7534 7553 hash->dth_size * sizeof (dtrace_hashbucket_t *));
7535 7554 kmem_free(hash, sizeof (dtrace_hash_t));
7536 7555 }
7537 7556
7538 7557 static void
7539 7558 dtrace_hash_resize(dtrace_hash_t *hash)
7540 7559 {
7541 7560 int size = hash->dth_size, i, ndx;
7542 7561 int new_size = hash->dth_size << 1;
7543 7562 int new_mask = new_size - 1;
7544 7563 dtrace_hashbucket_t **new_tab, *bucket, *next;
7545 7564
7546 7565 ASSERT((new_size & new_mask) == 0);
7547 7566
7548 7567 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
7549 7568
7550 7569 for (i = 0; i < size; i++) {
7551 7570 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
7552 7571 dtrace_probe_t *probe = bucket->dthb_chain;
7553 7572
7554 7573 ASSERT(probe != NULL);
7555 7574 ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
7556 7575
7557 7576 next = bucket->dthb_next;
7558 7577 bucket->dthb_next = new_tab[ndx];
7559 7578 new_tab[ndx] = bucket;
7560 7579 }
7561 7580 }
7562 7581
7563 7582 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
7564 7583 hash->dth_tab = new_tab;
7565 7584 hash->dth_size = new_size;
7566 7585 hash->dth_mask = new_mask;
7567 7586 }
7568 7587
7569 7588 static void
7570 7589 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
7571 7590 {
7572 7591 int hashval = DTRACE_HASHSTR(hash, new);
7573 7592 int ndx = hashval & hash->dth_mask;
7574 7593 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7575 7594 dtrace_probe_t **nextp, **prevp;
7576 7595
7577 7596 for (; bucket != NULL; bucket = bucket->dthb_next) {
7578 7597 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
7579 7598 goto add;
7580 7599 }
7581 7600
7582 7601 if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
7583 7602 dtrace_hash_resize(hash);
7584 7603 dtrace_hash_add(hash, new);
7585 7604 return;
7586 7605 }
7587 7606
7588 7607 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
7589 7608 bucket->dthb_next = hash->dth_tab[ndx];
7590 7609 hash->dth_tab[ndx] = bucket;
7591 7610 hash->dth_nbuckets++;
7592 7611
7593 7612 add:
7594 7613 nextp = DTRACE_HASHNEXT(hash, new);
7595 7614 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
7596 7615 *nextp = bucket->dthb_chain;
7597 7616
7598 7617 if (bucket->dthb_chain != NULL) {
7599 7618 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
7600 7619 ASSERT(*prevp == NULL);
7601 7620 *prevp = new;
7602 7621 }
7603 7622
7604 7623 bucket->dthb_chain = new;
7605 7624 bucket->dthb_len++;
7606 7625 }
7607 7626
7608 7627 static dtrace_probe_t *
7609 7628 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
7610 7629 {
7611 7630 int hashval = DTRACE_HASHSTR(hash, template);
7612 7631 int ndx = hashval & hash->dth_mask;
7613 7632 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7614 7633
7615 7634 for (; bucket != NULL; bucket = bucket->dthb_next) {
7616 7635 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
7617 7636 return (bucket->dthb_chain);
7618 7637 }
7619 7638
7620 7639 return (NULL);
7621 7640 }
7622 7641
7623 7642 static int
7624 7643 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
7625 7644 {
7626 7645 int hashval = DTRACE_HASHSTR(hash, template);
7627 7646 int ndx = hashval & hash->dth_mask;
7628 7647 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7629 7648
7630 7649 for (; bucket != NULL; bucket = bucket->dthb_next) {
7631 7650 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
7632 7651 return (bucket->dthb_len);
7633 7652 }
7634 7653
7635 7654 return (NULL);
7636 7655 }
7637 7656
7638 7657 static void
7639 7658 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
7640 7659 {
7641 7660 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
7642 7661 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7643 7662
7644 7663 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
7645 7664 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
7646 7665
7647 7666 /*
7648 7667 * Find the bucket that we're removing this probe from.
7649 7668 */
7650 7669 for (; bucket != NULL; bucket = bucket->dthb_next) {
7651 7670 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
7652 7671 break;
7653 7672 }
7654 7673
7655 7674 ASSERT(bucket != NULL);
7656 7675
7657 7676 if (*prevp == NULL) {
7658 7677 if (*nextp == NULL) {
7659 7678 /*
7660 7679 * The removed probe was the only probe on this
7661 7680 * bucket; we need to remove the bucket.
7662 7681 */
7663 7682 dtrace_hashbucket_t *b = hash->dth_tab[ndx];
7664 7683
7665 7684 ASSERT(bucket->dthb_chain == probe);
7666 7685 ASSERT(b != NULL);
7667 7686
7668 7687 if (b == bucket) {
7669 7688 hash->dth_tab[ndx] = bucket->dthb_next;
7670 7689 } else {
7671 7690 while (b->dthb_next != bucket)
7672 7691 b = b->dthb_next;
7673 7692 b->dthb_next = bucket->dthb_next;
7674 7693 }
7675 7694
7676 7695 ASSERT(hash->dth_nbuckets > 0);
7677 7696 hash->dth_nbuckets--;
7678 7697 kmem_free(bucket, sizeof (dtrace_hashbucket_t));
7679 7698 return;
7680 7699 }
7681 7700
7682 7701 bucket->dthb_chain = *nextp;
7683 7702 } else {
7684 7703 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
7685 7704 }
7686 7705
7687 7706 if (*nextp != NULL)
7688 7707 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
7689 7708 }
7690 7709
7691 7710 /*
7692 7711 * DTrace Utility Functions
7693 7712 *
7694 7713 * These are random utility functions that are _not_ called from probe context.
7695 7714 */
7696 7715 static int
7697 7716 dtrace_badattr(const dtrace_attribute_t *a)
7698 7717 {
7699 7718 return (a->dtat_name > DTRACE_STABILITY_MAX ||
7700 7719 a->dtat_data > DTRACE_STABILITY_MAX ||
7701 7720 a->dtat_class > DTRACE_CLASS_MAX);
7702 7721 }
7703 7722
7704 7723 /*
7705 7724 * Return a duplicate copy of a string. If the specified string is NULL,
7706 7725 * this function returns a zero-length string.
7707 7726 */
7708 7727 static char *
7709 7728 dtrace_strdup(const char *str)
7710 7729 {
7711 7730 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
7712 7731
7713 7732 if (str != NULL)
7714 7733 (void) strcpy(new, str);
7715 7734
7716 7735 return (new);
7717 7736 }
7718 7737
7719 7738 #define DTRACE_ISALPHA(c) \
7720 7739 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
7721 7740
7722 7741 static int
7723 7742 dtrace_badname(const char *s)
7724 7743 {
7725 7744 char c;
7726 7745
7727 7746 if (s == NULL || (c = *s++) == '\0')
7728 7747 return (0);
7729 7748
7730 7749 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
7731 7750 return (1);
7732 7751
7733 7752 while ((c = *s++) != '\0') {
7734 7753 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
7735 7754 c != '-' && c != '_' && c != '.' && c != '`')
7736 7755 return (1);
7737 7756 }
7738 7757
7739 7758 return (0);
7740 7759 }
7741 7760
7742 7761 static void
7743 7762 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
7744 7763 {
7745 7764 uint32_t priv;
7746 7765
7747 7766 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
7748 7767 /*
7749 7768 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
7750 7769 */
7751 7770 priv = DTRACE_PRIV_ALL;
7752 7771 } else {
7753 7772 *uidp = crgetuid(cr);
7754 7773 *zoneidp = crgetzoneid(cr);
7755 7774
7756 7775 priv = 0;
7757 7776 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
7758 7777 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
7759 7778 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
7760 7779 priv |= DTRACE_PRIV_USER;
7761 7780 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
7762 7781 priv |= DTRACE_PRIV_PROC;
7763 7782 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
7764 7783 priv |= DTRACE_PRIV_OWNER;
7765 7784 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
7766 7785 priv |= DTRACE_PRIV_ZONEOWNER;
7767 7786 }
7768 7787
7769 7788 *privp = priv;
7770 7789 }
7771 7790
7772 7791 #ifdef DTRACE_ERRDEBUG
7773 7792 static void
7774 7793 dtrace_errdebug(const char *str)
7775 7794 {
7776 7795 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ;
7777 7796 int occupied = 0;
7778 7797
7779 7798 mutex_enter(&dtrace_errlock);
7780 7799 dtrace_errlast = str;
7781 7800 dtrace_errthread = curthread;
7782 7801
7783 7802 while (occupied++ < DTRACE_ERRHASHSZ) {
7784 7803 if (dtrace_errhash[hval].dter_msg == str) {
7785 7804 dtrace_errhash[hval].dter_count++;
7786 7805 goto out;
7787 7806 }
7788 7807
7789 7808 if (dtrace_errhash[hval].dter_msg != NULL) {
7790 7809 hval = (hval + 1) % DTRACE_ERRHASHSZ;
7791 7810 continue;
7792 7811 }
7793 7812
7794 7813 dtrace_errhash[hval].dter_msg = str;
7795 7814 dtrace_errhash[hval].dter_count = 1;
7796 7815 goto out;
7797 7816 }
7798 7817
7799 7818 panic("dtrace: undersized error hash");
7800 7819 out:
7801 7820 mutex_exit(&dtrace_errlock);
7802 7821 }
7803 7822 #endif
7804 7823
7805 7824 /*
7806 7825 * DTrace Matching Functions
7807 7826 *
7808 7827 * These functions are used to match groups of probes, given some elements of
7809 7828 * a probe tuple, or some globbed expressions for elements of a probe tuple.
7810 7829 */
7811 7830 static int
7812 7831 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
7813 7832 zoneid_t zoneid)
7814 7833 {
7815 7834 if (priv != DTRACE_PRIV_ALL) {
7816 7835 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
7817 7836 uint32_t match = priv & ppriv;
7818 7837
7819 7838 /*
7820 7839 * No PRIV_DTRACE_* privileges...
7821 7840 */
7822 7841 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
7823 7842 DTRACE_PRIV_KERNEL)) == 0)
7824 7843 return (0);
7825 7844
7826 7845 /*
7827 7846 * No matching bits, but there were bits to match...
7828 7847 */
7829 7848 if (match == 0 && ppriv != 0)
7830 7849 return (0);
7831 7850
7832 7851 /*
7833 7852 * Need to have permissions to the process, but don't...
7834 7853 */
7835 7854 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
7836 7855 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
7837 7856 return (0);
7838 7857 }
7839 7858
7840 7859 /*
7841 7860 * Need to be in the same zone unless we possess the
7842 7861 * privilege to examine all zones.
7843 7862 */
7844 7863 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
7845 7864 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
7846 7865 return (0);
7847 7866 }
7848 7867 }
7849 7868
7850 7869 return (1);
7851 7870 }
7852 7871
7853 7872 /*
7854 7873 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
7855 7874 * consists of input pattern strings and an ops-vector to evaluate them.
7856 7875 * This function returns >0 for match, 0 for no match, and <0 for error.
7857 7876 */
7858 7877 static int
7859 7878 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
7860 7879 uint32_t priv, uid_t uid, zoneid_t zoneid)
7861 7880 {
7862 7881 dtrace_provider_t *pvp = prp->dtpr_provider;
7863 7882 int rv;
7864 7883
7865 7884 if (pvp->dtpv_defunct)
7866 7885 return (0);
7867 7886
7868 7887 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
7869 7888 return (rv);
7870 7889
7871 7890 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
7872 7891 return (rv);
7873 7892
7874 7893 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
7875 7894 return (rv);
7876 7895
7877 7896 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
7878 7897 return (rv);
7879 7898
7880 7899 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
7881 7900 return (0);
7882 7901
7883 7902 return (rv);
7884 7903 }
7885 7904
7886 7905 /*
7887 7906 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
7888 7907 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
7889 7908 * libc's version, the kernel version only applies to 8-bit ASCII strings.
7890 7909 * In addition, all of the recursion cases except for '*' matching have been
7891 7910 * unwound. For '*', we still implement recursive evaluation, but a depth
7892 7911 * counter is maintained and matching is aborted if we recurse too deep.
7893 7912 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
7894 7913 */
7895 7914 static int
7896 7915 dtrace_match_glob(const char *s, const char *p, int depth)
7897 7916 {
7898 7917 const char *olds;
7899 7918 char s1, c;
7900 7919 int gs;
7901 7920
7902 7921 if (depth > DTRACE_PROBEKEY_MAXDEPTH)
7903 7922 return (-1);
7904 7923
7905 7924 if (s == NULL)
7906 7925 s = ""; /* treat NULL as empty string */
7907 7926
7908 7927 top:
7909 7928 olds = s;
7910 7929 s1 = *s++;
7911 7930
7912 7931 if (p == NULL)
7913 7932 return (0);
7914 7933
7915 7934 if ((c = *p++) == '\0')
7916 7935 return (s1 == '\0');
7917 7936
7918 7937 switch (c) {
7919 7938 case '[': {
7920 7939 int ok = 0, notflag = 0;
7921 7940 char lc = '\0';
7922 7941
7923 7942 if (s1 == '\0')
7924 7943 return (0);
7925 7944
7926 7945 if (*p == '!') {
7927 7946 notflag = 1;
7928 7947 p++;
7929 7948 }
7930 7949
7931 7950 if ((c = *p++) == '\0')
7932 7951 return (0);
7933 7952
7934 7953 do {
7935 7954 if (c == '-' && lc != '\0' && *p != ']') {
7936 7955 if ((c = *p++) == '\0')
7937 7956 return (0);
7938 7957 if (c == '\\' && (c = *p++) == '\0')
7939 7958 return (0);
7940 7959
7941 7960 if (notflag) {
7942 7961 if (s1 < lc || s1 > c)
7943 7962 ok++;
7944 7963 else
7945 7964 return (0);
7946 7965 } else if (lc <= s1 && s1 <= c)
7947 7966 ok++;
7948 7967
7949 7968 } else if (c == '\\' && (c = *p++) == '\0')
7950 7969 return (0);
7951 7970
7952 7971 lc = c; /* save left-hand 'c' for next iteration */
7953 7972
7954 7973 if (notflag) {
7955 7974 if (s1 != c)
7956 7975 ok++;
7957 7976 else
7958 7977 return (0);
7959 7978 } else if (s1 == c)
7960 7979 ok++;
7961 7980
7962 7981 if ((c = *p++) == '\0')
7963 7982 return (0);
7964 7983
7965 7984 } while (c != ']');
7966 7985
7967 7986 if (ok)
7968 7987 goto top;
7969 7988
7970 7989 return (0);
7971 7990 }
7972 7991
7973 7992 case '\\':
7974 7993 if ((c = *p++) == '\0')
7975 7994 return (0);
7976 7995 /*FALLTHRU*/
7977 7996
7978 7997 default:
7979 7998 if (c != s1)
7980 7999 return (0);
7981 8000 /*FALLTHRU*/
7982 8001
7983 8002 case '?':
7984 8003 if (s1 != '\0')
7985 8004 goto top;
7986 8005 return (0);
7987 8006
7988 8007 case '*':
7989 8008 while (*p == '*')
7990 8009 p++; /* consecutive *'s are identical to a single one */
7991 8010
7992 8011 if (*p == '\0')
7993 8012 return (1);
7994 8013
7995 8014 for (s = olds; *s != '\0'; s++) {
7996 8015 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
7997 8016 return (gs);
7998 8017 }
7999 8018
8000 8019 return (0);
8001 8020 }
8002 8021 }
8003 8022
8004 8023 /*ARGSUSED*/
8005 8024 static int
8006 8025 dtrace_match_string(const char *s, const char *p, int depth)
8007 8026 {
8008 8027 return (s != NULL && strcmp(s, p) == 0);
8009 8028 }
8010 8029
8011 8030 /*ARGSUSED*/
8012 8031 static int
8013 8032 dtrace_match_nul(const char *s, const char *p, int depth)
8014 8033 {
8015 8034 return (1); /* always match the empty pattern */
8016 8035 }
8017 8036
8018 8037 /*ARGSUSED*/
8019 8038 static int
8020 8039 dtrace_match_nonzero(const char *s, const char *p, int depth)
8021 8040 {
8022 8041 return (s != NULL && s[0] != '\0');
8023 8042 }
8024 8043
8025 8044 static int
8026 8045 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
8027 8046 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
8028 8047 {
8029 8048 dtrace_probe_t template, *probe;
8030 8049 dtrace_hash_t *hash = NULL;
8031 8050 int len, rc, best = INT_MAX, nmatched = 0;
8032 8051 dtrace_id_t i;
8033 8052
8034 8053 ASSERT(MUTEX_HELD(&dtrace_lock));
8035 8054
8036 8055 /*
8037 8056 * If the probe ID is specified in the key, just lookup by ID and
8038 8057 * invoke the match callback once if a matching probe is found.
8039 8058 */
8040 8059 if (pkp->dtpk_id != DTRACE_IDNONE) {
8041 8060 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
8042 8061 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
8043 8062 if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
8044 8063 return (DTRACE_MATCH_FAIL);
8045 8064 nmatched++;
8046 8065 }
8047 8066 return (nmatched);
8048 8067 }
8049 8068
8050 8069 template.dtpr_mod = (char *)pkp->dtpk_mod;
8051 8070 template.dtpr_func = (char *)pkp->dtpk_func;
8052 8071 template.dtpr_name = (char *)pkp->dtpk_name;
8053 8072
8054 8073 /*
8055 8074 * We want to find the most distinct of the module name, function
8056 8075 * name, and name. So for each one that is not a glob pattern or
8057 8076 * empty string, we perform a lookup in the corresponding hash and
8058 8077 * use the hash table with the fewest collisions to do our search.
8059 8078 */
8060 8079 if (pkp->dtpk_mmatch == &dtrace_match_string &&
8061 8080 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
8062 8081 best = len;
8063 8082 hash = dtrace_bymod;
8064 8083 }
8065 8084
8066 8085 if (pkp->dtpk_fmatch == &dtrace_match_string &&
8067 8086 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
8068 8087 best = len;
8069 8088 hash = dtrace_byfunc;
8070 8089 }
8071 8090
8072 8091 if (pkp->dtpk_nmatch == &dtrace_match_string &&
8073 8092 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
8074 8093 best = len;
8075 8094 hash = dtrace_byname;
8076 8095 }
8077 8096
8078 8097 /*
8079 8098 * If we did not select a hash table, iterate over every probe and
8080 8099 * invoke our callback for each one that matches our input probe key.
8081 8100 */
8082 8101 if (hash == NULL) {
8083 8102 for (i = 0; i < dtrace_nprobes; i++) {
8084 8103 if ((probe = dtrace_probes[i]) == NULL ||
8085 8104 dtrace_match_probe(probe, pkp, priv, uid,
8086 8105 zoneid) <= 0)
8087 8106 continue;
8088 8107
8089 8108 nmatched++;
8090 8109
8091 8110 if ((rc = (*matched)(probe, arg)) !=
8092 8111 DTRACE_MATCH_NEXT) {
8093 8112 if (rc == DTRACE_MATCH_FAIL)
8094 8113 return (DTRACE_MATCH_FAIL);
8095 8114 break;
8096 8115 }
8097 8116 }
8098 8117
8099 8118 return (nmatched);
8100 8119 }
8101 8120
8102 8121 /*
8103 8122 * If we selected a hash table, iterate over each probe of the same key
8104 8123 * name and invoke the callback for every probe that matches the other
8105 8124 * attributes of our input probe key.
8106 8125 */
8107 8126 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
8108 8127 probe = *(DTRACE_HASHNEXT(hash, probe))) {
8109 8128
8110 8129 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
8111 8130 continue;
8112 8131
8113 8132 nmatched++;
8114 8133
8115 8134 if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
8116 8135 if (rc == DTRACE_MATCH_FAIL)
8117 8136 return (DTRACE_MATCH_FAIL);
8118 8137 break;
8119 8138 }
8120 8139 }
8121 8140
8122 8141 return (nmatched);
8123 8142 }
8124 8143
8125 8144 /*
8126 8145 * Return the function pointer dtrace_probecmp() should use to compare the
8127 8146 * specified pattern with a string. For NULL or empty patterns, we select
8128 8147 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
8129 8148 * For non-empty non-glob strings, we use dtrace_match_string().
8130 8149 */
8131 8150 static dtrace_probekey_f *
8132 8151 dtrace_probekey_func(const char *p)
8133 8152 {
8134 8153 char c;
8135 8154
8136 8155 if (p == NULL || *p == '\0')
8137 8156 return (&dtrace_match_nul);
8138 8157
8139 8158 while ((c = *p++) != '\0') {
8140 8159 if (c == '[' || c == '?' || c == '*' || c == '\\')
8141 8160 return (&dtrace_match_glob);
8142 8161 }
8143 8162
8144 8163 return (&dtrace_match_string);
8145 8164 }
8146 8165
8147 8166 /*
8148 8167 * Build a probe comparison key for use with dtrace_match_probe() from the
8149 8168 * given probe description. By convention, a null key only matches anchored
8150 8169 * probes: if each field is the empty string, reset dtpk_fmatch to
8151 8170 * dtrace_match_nonzero().
8152 8171 */
8153 8172 static void
8154 8173 dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
8155 8174 {
8156 8175 pkp->dtpk_prov = pdp->dtpd_provider;
8157 8176 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
8158 8177
8159 8178 pkp->dtpk_mod = pdp->dtpd_mod;
8160 8179 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
8161 8180
8162 8181 pkp->dtpk_func = pdp->dtpd_func;
8163 8182 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
8164 8183
8165 8184 pkp->dtpk_name = pdp->dtpd_name;
8166 8185 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
8167 8186
8168 8187 pkp->dtpk_id = pdp->dtpd_id;
8169 8188
8170 8189 if (pkp->dtpk_id == DTRACE_IDNONE &&
8171 8190 pkp->dtpk_pmatch == &dtrace_match_nul &&
8172 8191 pkp->dtpk_mmatch == &dtrace_match_nul &&
8173 8192 pkp->dtpk_fmatch == &dtrace_match_nul &&
8174 8193 pkp->dtpk_nmatch == &dtrace_match_nul)
8175 8194 pkp->dtpk_fmatch = &dtrace_match_nonzero;
8176 8195 }
8177 8196
8178 8197 /*
8179 8198 * DTrace Provider-to-Framework API Functions
8180 8199 *
8181 8200 * These functions implement much of the Provider-to-Framework API, as
8182 8201 * described in <sys/dtrace.h>. The parts of the API not in this section are
8183 8202 * the functions in the API for probe management (found below), and
8184 8203 * dtrace_probe() itself (found above).
8185 8204 */
8186 8205
8187 8206 /*
8188 8207 * Register the calling provider with the DTrace framework. This should
8189 8208 * generally be called by DTrace providers in their attach(9E) entry point.
8190 8209 */
8191 8210 int
8192 8211 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
8193 8212 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
8194 8213 {
8195 8214 dtrace_provider_t *provider;
8196 8215
8197 8216 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
8198 8217 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8199 8218 "arguments", name ? name : "<NULL>");
8200 8219 return (EINVAL);
8201 8220 }
8202 8221
8203 8222 if (name[0] == '\0' || dtrace_badname(name)) {
8204 8223 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8205 8224 "provider name", name);
8206 8225 return (EINVAL);
8207 8226 }
8208 8227
8209 8228 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
8210 8229 pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
8211 8230 pops->dtps_destroy == NULL ||
8212 8231 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
8213 8232 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8214 8233 "provider ops", name);
8215 8234 return (EINVAL);
8216 8235 }
8217 8236
8218 8237 if (dtrace_badattr(&pap->dtpa_provider) ||
8219 8238 dtrace_badattr(&pap->dtpa_mod) ||
8220 8239 dtrace_badattr(&pap->dtpa_func) ||
8221 8240 dtrace_badattr(&pap->dtpa_name) ||
8222 8241 dtrace_badattr(&pap->dtpa_args)) {
8223 8242 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8224 8243 "provider attributes", name);
8225 8244 return (EINVAL);
8226 8245 }
8227 8246
8228 8247 if (priv & ~DTRACE_PRIV_ALL) {
8229 8248 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8230 8249 "privilege attributes", name);
8231 8250 return (EINVAL);
8232 8251 }
8233 8252
8234 8253 if ((priv & DTRACE_PRIV_KERNEL) &&
8235 8254 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
8236 8255 pops->dtps_mode == NULL) {
8237 8256 cmn_err(CE_WARN, "failed to register provider '%s': need "
8238 8257 "dtps_mode() op for given privilege attributes", name);
8239 8258 return (EINVAL);
8240 8259 }
8241 8260
8242 8261 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
8243 8262 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
8244 8263 (void) strcpy(provider->dtpv_name, name);
8245 8264
8246 8265 provider->dtpv_attr = *pap;
8247 8266 provider->dtpv_priv.dtpp_flags = priv;
8248 8267 if (cr != NULL) {
8249 8268 provider->dtpv_priv.dtpp_uid = crgetuid(cr);
8250 8269 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
8251 8270 }
8252 8271 provider->dtpv_pops = *pops;
8253 8272
8254 8273 if (pops->dtps_provide == NULL) {
8255 8274 ASSERT(pops->dtps_provide_module != NULL);
8256 8275 provider->dtpv_pops.dtps_provide =
8257 8276 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop;
8258 8277 }
8259 8278
8260 8279 if (pops->dtps_provide_module == NULL) {
8261 8280 ASSERT(pops->dtps_provide != NULL);
8262 8281 provider->dtpv_pops.dtps_provide_module =
8263 8282 (void (*)(void *, struct modctl *))dtrace_nullop;
8264 8283 }
8265 8284
8266 8285 if (pops->dtps_suspend == NULL) {
8267 8286 ASSERT(pops->dtps_resume == NULL);
8268 8287 provider->dtpv_pops.dtps_suspend =
8269 8288 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
8270 8289 provider->dtpv_pops.dtps_resume =
8271 8290 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
8272 8291 }
8273 8292
8274 8293 provider->dtpv_arg = arg;
8275 8294 *idp = (dtrace_provider_id_t)provider;
8276 8295
8277 8296 if (pops == &dtrace_provider_ops) {
8278 8297 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
8279 8298 ASSERT(MUTEX_HELD(&dtrace_lock));
8280 8299 ASSERT(dtrace_anon.dta_enabling == NULL);
8281 8300
8282 8301 /*
8283 8302 * We make sure that the DTrace provider is at the head of
8284 8303 * the provider chain.
8285 8304 */
8286 8305 provider->dtpv_next = dtrace_provider;
8287 8306 dtrace_provider = provider;
8288 8307 return (0);
8289 8308 }
8290 8309
8291 8310 mutex_enter(&dtrace_provider_lock);
8292 8311 mutex_enter(&dtrace_lock);
8293 8312
8294 8313 /*
8295 8314 * If there is at least one provider registered, we'll add this
8296 8315 * provider after the first provider.
8297 8316 */
8298 8317 if (dtrace_provider != NULL) {
8299 8318 provider->dtpv_next = dtrace_provider->dtpv_next;
8300 8319 dtrace_provider->dtpv_next = provider;
8301 8320 } else {
8302 8321 dtrace_provider = provider;
8303 8322 }
8304 8323
8305 8324 if (dtrace_retained != NULL) {
8306 8325 dtrace_enabling_provide(provider);
8307 8326
8308 8327 /*
8309 8328 * Now we need to call dtrace_enabling_matchall() -- which
8310 8329 * will acquire cpu_lock and dtrace_lock. We therefore need
8311 8330 * to drop all of our locks before calling into it...
8312 8331 */
8313 8332 mutex_exit(&dtrace_lock);
8314 8333 mutex_exit(&dtrace_provider_lock);
8315 8334 dtrace_enabling_matchall();
8316 8335
8317 8336 return (0);
8318 8337 }
8319 8338
8320 8339 mutex_exit(&dtrace_lock);
8321 8340 mutex_exit(&dtrace_provider_lock);
8322 8341
8323 8342 return (0);
8324 8343 }
8325 8344
8326 8345 /*
8327 8346 * Unregister the specified provider from the DTrace framework. This should
8328 8347 * generally be called by DTrace providers in their detach(9E) entry point.
8329 8348 */
8330 8349 int
8331 8350 dtrace_unregister(dtrace_provider_id_t id)
8332 8351 {
8333 8352 dtrace_provider_t *old = (dtrace_provider_t *)id;
8334 8353 dtrace_provider_t *prev = NULL;
8335 8354 int i, self = 0, noreap = 0;
8336 8355 dtrace_probe_t *probe, *first = NULL;
8337 8356
8338 8357 if (old->dtpv_pops.dtps_enable ==
8339 8358 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
8340 8359 /*
8341 8360 * If DTrace itself is the provider, we're called with locks
8342 8361 * already held.
8343 8362 */
8344 8363 ASSERT(old == dtrace_provider);
8345 8364 ASSERT(dtrace_devi != NULL);
8346 8365 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
8347 8366 ASSERT(MUTEX_HELD(&dtrace_lock));
8348 8367 self = 1;
8349 8368
8350 8369 if (dtrace_provider->dtpv_next != NULL) {
8351 8370 /*
8352 8371 * There's another provider here; return failure.
8353 8372 */
8354 8373 return (EBUSY);
8355 8374 }
8356 8375 } else {
8357 8376 mutex_enter(&dtrace_provider_lock);
8358 8377 mutex_enter(&mod_lock);
8359 8378 mutex_enter(&dtrace_lock);
8360 8379 }
8361 8380
8362 8381 /*
8363 8382 * If anyone has /dev/dtrace open, or if there are anonymous enabled
8364 8383 * probes, we refuse to let providers slither away, unless this
8365 8384 * provider has already been explicitly invalidated.
8366 8385 */
8367 8386 if (!old->dtpv_defunct &&
8368 8387 (dtrace_opens || (dtrace_anon.dta_state != NULL &&
8369 8388 dtrace_anon.dta_state->dts_necbs > 0))) {
8370 8389 if (!self) {
8371 8390 mutex_exit(&dtrace_lock);
8372 8391 mutex_exit(&mod_lock);
8373 8392 mutex_exit(&dtrace_provider_lock);
8374 8393 }
8375 8394 return (EBUSY);
8376 8395 }
8377 8396
8378 8397 /*
8379 8398 * Attempt to destroy the probes associated with this provider.
8380 8399 */
8381 8400 for (i = 0; i < dtrace_nprobes; i++) {
8382 8401 if ((probe = dtrace_probes[i]) == NULL)
8383 8402 continue;
8384 8403
8385 8404 if (probe->dtpr_provider != old)
8386 8405 continue;
8387 8406
8388 8407 if (probe->dtpr_ecb == NULL)
8389 8408 continue;
8390 8409
8391 8410 /*
8392 8411 * If we are trying to unregister a defunct provider, and the
8393 8412 * provider was made defunct within the interval dictated by
8394 8413 * dtrace_unregister_defunct_reap, we'll (asynchronously)
8395 8414 * attempt to reap our enablings. To denote that the provider
8396 8415 * should reattempt to unregister itself at some point in the
8397 8416 * future, we will return a differentiable error code (EAGAIN
8398 8417 * instead of EBUSY) in this case.
8399 8418 */
8400 8419 if (dtrace_gethrtime() - old->dtpv_defunct >
8401 8420 dtrace_unregister_defunct_reap)
8402 8421 noreap = 1;
8403 8422
8404 8423 if (!self) {
8405 8424 mutex_exit(&dtrace_lock);
8406 8425 mutex_exit(&mod_lock);
8407 8426 mutex_exit(&dtrace_provider_lock);
8408 8427 }
8409 8428
8410 8429 if (noreap)
8411 8430 return (EBUSY);
8412 8431
8413 8432 (void) taskq_dispatch(dtrace_taskq,
8414 8433 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
8415 8434
8416 8435 return (EAGAIN);
8417 8436 }
8418 8437
8419 8438 /*
8420 8439 * All of the probes for this provider are disabled; we can safely
8421 8440 * remove all of them from their hash chains and from the probe array.
8422 8441 */
8423 8442 for (i = 0; i < dtrace_nprobes; i++) {
8424 8443 if ((probe = dtrace_probes[i]) == NULL)
8425 8444 continue;
8426 8445
8427 8446 if (probe->dtpr_provider != old)
8428 8447 continue;
8429 8448
8430 8449 dtrace_probes[i] = NULL;
8431 8450
8432 8451 dtrace_hash_remove(dtrace_bymod, probe);
8433 8452 dtrace_hash_remove(dtrace_byfunc, probe);
8434 8453 dtrace_hash_remove(dtrace_byname, probe);
8435 8454
8436 8455 if (first == NULL) {
8437 8456 first = probe;
8438 8457 probe->dtpr_nextmod = NULL;
8439 8458 } else {
8440 8459 probe->dtpr_nextmod = first;
8441 8460 first = probe;
8442 8461 }
8443 8462 }
8444 8463
8445 8464 /*
8446 8465 * The provider's probes have been removed from the hash chains and
8447 8466 * from the probe array. Now issue a dtrace_sync() to be sure that
8448 8467 * everyone has cleared out from any probe array processing.
8449 8468 */
8450 8469 dtrace_sync();
8451 8470
8452 8471 for (probe = first; probe != NULL; probe = first) {
8453 8472 first = probe->dtpr_nextmod;
8454 8473
8455 8474 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
8456 8475 probe->dtpr_arg);
8457 8476 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
8458 8477 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
8459 8478 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
8460 8479 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
8461 8480 kmem_free(probe, sizeof (dtrace_probe_t));
8462 8481 }
8463 8482
8464 8483 if ((prev = dtrace_provider) == old) {
8465 8484 ASSERT(self || dtrace_devi == NULL);
8466 8485 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
8467 8486 dtrace_provider = old->dtpv_next;
8468 8487 } else {
8469 8488 while (prev != NULL && prev->dtpv_next != old)
8470 8489 prev = prev->dtpv_next;
8471 8490
8472 8491 if (prev == NULL) {
8473 8492 panic("attempt to unregister non-existent "
8474 8493 "dtrace provider %p\n", (void *)id);
8475 8494 }
8476 8495
8477 8496 prev->dtpv_next = old->dtpv_next;
8478 8497 }
8479 8498
8480 8499 if (!self) {
8481 8500 mutex_exit(&dtrace_lock);
8482 8501 mutex_exit(&mod_lock);
8483 8502 mutex_exit(&dtrace_provider_lock);
8484 8503 }
8485 8504
8486 8505 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
8487 8506 kmem_free(old, sizeof (dtrace_provider_t));
8488 8507
8489 8508 return (0);
8490 8509 }
8491 8510
8492 8511 /*
8493 8512 * Invalidate the specified provider. All subsequent probe lookups for the
8494 8513 * specified provider will fail, but its probes will not be removed.
8495 8514 */
8496 8515 void
8497 8516 dtrace_invalidate(dtrace_provider_id_t id)
8498 8517 {
8499 8518 dtrace_provider_t *pvp = (dtrace_provider_t *)id;
8500 8519
8501 8520 ASSERT(pvp->dtpv_pops.dtps_enable !=
8502 8521 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
8503 8522
8504 8523 mutex_enter(&dtrace_provider_lock);
8505 8524 mutex_enter(&dtrace_lock);
8506 8525
8507 8526 pvp->dtpv_defunct = dtrace_gethrtime();
8508 8527
8509 8528 mutex_exit(&dtrace_lock);
8510 8529 mutex_exit(&dtrace_provider_lock);
8511 8530 }
8512 8531
8513 8532 /*
8514 8533 * Indicate whether or not DTrace has attached.
8515 8534 */
8516 8535 int
8517 8536 dtrace_attached(void)
8518 8537 {
8519 8538 /*
8520 8539 * dtrace_provider will be non-NULL iff the DTrace driver has
8521 8540 * attached. (It's non-NULL because DTrace is always itself a
8522 8541 * provider.)
8523 8542 */
8524 8543 return (dtrace_provider != NULL);
8525 8544 }
8526 8545
8527 8546 /*
8528 8547 * Remove all the unenabled probes for the given provider. This function is
8529 8548 * not unlike dtrace_unregister(), except that it doesn't remove the provider
8530 8549 * -- just as many of its associated probes as it can.
8531 8550 */
8532 8551 int
8533 8552 dtrace_condense(dtrace_provider_id_t id)
8534 8553 {
8535 8554 dtrace_provider_t *prov = (dtrace_provider_t *)id;
8536 8555 int i;
8537 8556 dtrace_probe_t *probe;
8538 8557
8539 8558 /*
8540 8559 * Make sure this isn't the dtrace provider itself.
8541 8560 */
8542 8561 ASSERT(prov->dtpv_pops.dtps_enable !=
8543 8562 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
8544 8563
8545 8564 mutex_enter(&dtrace_provider_lock);
8546 8565 mutex_enter(&dtrace_lock);
8547 8566
8548 8567 /*
8549 8568 * Attempt to destroy the probes associated with this provider.
8550 8569 */
8551 8570 for (i = 0; i < dtrace_nprobes; i++) {
8552 8571 if ((probe = dtrace_probes[i]) == NULL)
8553 8572 continue;
8554 8573
8555 8574 if (probe->dtpr_provider != prov)
8556 8575 continue;
8557 8576
8558 8577 if (probe->dtpr_ecb != NULL)
8559 8578 continue;
8560 8579
8561 8580 dtrace_probes[i] = NULL;
8562 8581
8563 8582 dtrace_hash_remove(dtrace_bymod, probe);
8564 8583 dtrace_hash_remove(dtrace_byfunc, probe);
8565 8584 dtrace_hash_remove(dtrace_byname, probe);
8566 8585
8567 8586 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
8568 8587 probe->dtpr_arg);
8569 8588 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
8570 8589 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
8571 8590 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
8572 8591 kmem_free(probe, sizeof (dtrace_probe_t));
8573 8592 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
8574 8593 }
8575 8594
8576 8595 mutex_exit(&dtrace_lock);
8577 8596 mutex_exit(&dtrace_provider_lock);
8578 8597
8579 8598 return (0);
8580 8599 }
8581 8600
8582 8601 /*
8583 8602 * DTrace Probe Management Functions
8584 8603 *
8585 8604 * The functions in this section perform the DTrace probe management,
8586 8605 * including functions to create probes, look-up probes, and call into the
8587 8606 * providers to request that probes be provided. Some of these functions are
8588 8607 * in the Provider-to-Framework API; these functions can be identified by the
8589 8608 * fact that they are not declared "static".
8590 8609 */
8591 8610
8592 8611 /*
8593 8612 * Create a probe with the specified module name, function name, and name.
8594 8613 */
8595 8614 dtrace_id_t
8596 8615 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
8597 8616 const char *func, const char *name, int aframes, void *arg)
8598 8617 {
8599 8618 dtrace_probe_t *probe, **probes;
8600 8619 dtrace_provider_t *provider = (dtrace_provider_t *)prov;
8601 8620 dtrace_id_t id;
8602 8621
8603 8622 if (provider == dtrace_provider) {
8604 8623 ASSERT(MUTEX_HELD(&dtrace_lock));
8605 8624 } else {
8606 8625 mutex_enter(&dtrace_lock);
8607 8626 }
8608 8627
8609 8628 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
8610 8629 VM_BESTFIT | VM_SLEEP);
8611 8630 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
8612 8631
8613 8632 probe->dtpr_id = id;
8614 8633 probe->dtpr_gen = dtrace_probegen++;
8615 8634 probe->dtpr_mod = dtrace_strdup(mod);
8616 8635 probe->dtpr_func = dtrace_strdup(func);
8617 8636 probe->dtpr_name = dtrace_strdup(name);
8618 8637 probe->dtpr_arg = arg;
8619 8638 probe->dtpr_aframes = aframes;
8620 8639 probe->dtpr_provider = provider;
8621 8640
8622 8641 dtrace_hash_add(dtrace_bymod, probe);
8623 8642 dtrace_hash_add(dtrace_byfunc, probe);
8624 8643 dtrace_hash_add(dtrace_byname, probe);
8625 8644
8626 8645 if (id - 1 >= dtrace_nprobes) {
8627 8646 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
8628 8647 size_t nsize = osize << 1;
8629 8648
8630 8649 if (nsize == 0) {
8631 8650 ASSERT(osize == 0);
8632 8651 ASSERT(dtrace_probes == NULL);
8633 8652 nsize = sizeof (dtrace_probe_t *);
8634 8653 }
8635 8654
8636 8655 probes = kmem_zalloc(nsize, KM_SLEEP);
8637 8656
8638 8657 if (dtrace_probes == NULL) {
8639 8658 ASSERT(osize == 0);
8640 8659 dtrace_probes = probes;
8641 8660 dtrace_nprobes = 1;
8642 8661 } else {
8643 8662 dtrace_probe_t **oprobes = dtrace_probes;
8644 8663
8645 8664 bcopy(oprobes, probes, osize);
8646 8665 dtrace_membar_producer();
8647 8666 dtrace_probes = probes;
8648 8667
8649 8668 dtrace_sync();
8650 8669
8651 8670 /*
8652 8671 * All CPUs are now seeing the new probes array; we can
8653 8672 * safely free the old array.
8654 8673 */
8655 8674 kmem_free(oprobes, osize);
8656 8675 dtrace_nprobes <<= 1;
8657 8676 }
8658 8677
8659 8678 ASSERT(id - 1 < dtrace_nprobes);
8660 8679 }
8661 8680
8662 8681 ASSERT(dtrace_probes[id - 1] == NULL);
8663 8682 dtrace_probes[id - 1] = probe;
8664 8683
8665 8684 if (provider != dtrace_provider)
8666 8685 mutex_exit(&dtrace_lock);
8667 8686
8668 8687 return (id);
8669 8688 }
8670 8689
8671 8690 static dtrace_probe_t *
8672 8691 dtrace_probe_lookup_id(dtrace_id_t id)
8673 8692 {
8674 8693 ASSERT(MUTEX_HELD(&dtrace_lock));
8675 8694
8676 8695 if (id == 0 || id > dtrace_nprobes)
8677 8696 return (NULL);
8678 8697
8679 8698 return (dtrace_probes[id - 1]);
8680 8699 }
8681 8700
8682 8701 static int
8683 8702 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
8684 8703 {
8685 8704 *((dtrace_id_t *)arg) = probe->dtpr_id;
8686 8705
8687 8706 return (DTRACE_MATCH_DONE);
8688 8707 }
8689 8708
8690 8709 /*
8691 8710 * Look up a probe based on provider and one or more of module name, function
8692 8711 * name and probe name.
8693 8712 */
8694 8713 dtrace_id_t
8695 8714 dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
8696 8715 const char *func, const char *name)
8697 8716 {
8698 8717 dtrace_probekey_t pkey;
8699 8718 dtrace_id_t id;
8700 8719 int match;
8701 8720
8702 8721 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
8703 8722 pkey.dtpk_pmatch = &dtrace_match_string;
8704 8723 pkey.dtpk_mod = mod;
8705 8724 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
8706 8725 pkey.dtpk_func = func;
8707 8726 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
8708 8727 pkey.dtpk_name = name;
8709 8728 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
8710 8729 pkey.dtpk_id = DTRACE_IDNONE;
8711 8730
8712 8731 mutex_enter(&dtrace_lock);
8713 8732 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
8714 8733 dtrace_probe_lookup_match, &id);
8715 8734 mutex_exit(&dtrace_lock);
8716 8735
8717 8736 ASSERT(match == 1 || match == 0);
8718 8737 return (match ? id : 0);
8719 8738 }
8720 8739
8721 8740 /*
8722 8741 * Returns the probe argument associated with the specified probe.
8723 8742 */
8724 8743 void *
8725 8744 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
8726 8745 {
8727 8746 dtrace_probe_t *probe;
8728 8747 void *rval = NULL;
8729 8748
8730 8749 mutex_enter(&dtrace_lock);
8731 8750
8732 8751 if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
8733 8752 probe->dtpr_provider == (dtrace_provider_t *)id)
8734 8753 rval = probe->dtpr_arg;
8735 8754
8736 8755 mutex_exit(&dtrace_lock);
8737 8756
8738 8757 return (rval);
8739 8758 }
8740 8759
8741 8760 /*
8742 8761 * Copy a probe into a probe description.
8743 8762 */
8744 8763 static void
8745 8764 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
8746 8765 {
8747 8766 bzero(pdp, sizeof (dtrace_probedesc_t));
8748 8767 pdp->dtpd_id = prp->dtpr_id;
8749 8768
8750 8769 (void) strncpy(pdp->dtpd_provider,
8751 8770 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
8752 8771
8753 8772 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
8754 8773 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
8755 8774 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
8756 8775 }
8757 8776
8758 8777 /*
8759 8778 * Called to indicate that a probe -- or probes -- should be provided by a
8760 8779 * specfied provider. If the specified description is NULL, the provider will
8761 8780 * be told to provide all of its probes. (This is done whenever a new
8762 8781 * consumer comes along, or whenever a retained enabling is to be matched.) If
8763 8782 * the specified description is non-NULL, the provider is given the
8764 8783 * opportunity to dynamically provide the specified probe, allowing providers
8765 8784 * to support the creation of probes on-the-fly. (So-called _autocreated_
8766 8785 * probes.) If the provider is NULL, the operations will be applied to all
8767 8786 * providers; if the provider is non-NULL the operations will only be applied
8768 8787 * to the specified provider. The dtrace_provider_lock must be held, and the
8769 8788 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
8770 8789 * will need to grab the dtrace_lock when it reenters the framework through
8771 8790 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
8772 8791 */
8773 8792 static void
8774 8793 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
8775 8794 {
8776 8795 struct modctl *ctl;
8777 8796 int all = 0;
8778 8797
8779 8798 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
8780 8799
8781 8800 if (prv == NULL) {
8782 8801 all = 1;
8783 8802 prv = dtrace_provider;
8784 8803 }
8785 8804
8786 8805 do {
8787 8806 /*
8788 8807 * First, call the blanket provide operation.
8789 8808 */
8790 8809 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
8791 8810
8792 8811 /*
8793 8812 * Now call the per-module provide operation. We will grab
8794 8813 * mod_lock to prevent the list from being modified. Note
8795 8814 * that this also prevents the mod_busy bits from changing.
8796 8815 * (mod_busy can only be changed with mod_lock held.)
8797 8816 */
8798 8817 mutex_enter(&mod_lock);
8799 8818
8800 8819 ctl = &modules;
8801 8820 do {
8802 8821 if (ctl->mod_busy || ctl->mod_mp == NULL)
8803 8822 continue;
8804 8823
8805 8824 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
8806 8825
8807 8826 } while ((ctl = ctl->mod_next) != &modules);
8808 8827
8809 8828 mutex_exit(&mod_lock);
8810 8829 } while (all && (prv = prv->dtpv_next) != NULL);
8811 8830 }
8812 8831
8813 8832 /*
8814 8833 * Iterate over each probe, and call the Framework-to-Provider API function
8815 8834 * denoted by offs.
8816 8835 */
8817 8836 static void
8818 8837 dtrace_probe_foreach(uintptr_t offs)
8819 8838 {
8820 8839 dtrace_provider_t *prov;
8821 8840 void (*func)(void *, dtrace_id_t, void *);
8822 8841 dtrace_probe_t *probe;
8823 8842 dtrace_icookie_t cookie;
8824 8843 int i;
8825 8844
8826 8845 /*
8827 8846 * We disable interrupts to walk through the probe array. This is
8828 8847 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
8829 8848 * won't see stale data.
8830 8849 */
8831 8850 cookie = dtrace_interrupt_disable();
8832 8851
8833 8852 for (i = 0; i < dtrace_nprobes; i++) {
8834 8853 if ((probe = dtrace_probes[i]) == NULL)
8835 8854 continue;
8836 8855
8837 8856 if (probe->dtpr_ecb == NULL) {
8838 8857 /*
8839 8858 * This probe isn't enabled -- don't call the function.
8840 8859 */
8841 8860 continue;
8842 8861 }
8843 8862
8844 8863 prov = probe->dtpr_provider;
8845 8864 func = *((void(**)(void *, dtrace_id_t, void *))
8846 8865 ((uintptr_t)&prov->dtpv_pops + offs));
8847 8866
8848 8867 func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
8849 8868 }
8850 8869
8851 8870 dtrace_interrupt_enable(cookie);
8852 8871 }
8853 8872
8854 8873 static int
8855 8874 dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
8856 8875 {
8857 8876 dtrace_probekey_t pkey;
8858 8877 uint32_t priv;
8859 8878 uid_t uid;
8860 8879 zoneid_t zoneid;
8861 8880
8862 8881 ASSERT(MUTEX_HELD(&dtrace_lock));
8863 8882 dtrace_ecb_create_cache = NULL;
8864 8883
8865 8884 if (desc == NULL) {
8866 8885 /*
8867 8886 * If we're passed a NULL description, we're being asked to
8868 8887 * create an ECB with a NULL probe.
8869 8888 */
8870 8889 (void) dtrace_ecb_create_enable(NULL, enab);
8871 8890 return (0);
8872 8891 }
8873 8892
8874 8893 dtrace_probekey(desc, &pkey);
8875 8894 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
8876 8895 &priv, &uid, &zoneid);
8877 8896
8878 8897 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
8879 8898 enab));
8880 8899 }
8881 8900
8882 8901 /*
8883 8902 * DTrace Helper Provider Functions
8884 8903 */
8885 8904 static void
8886 8905 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
8887 8906 {
8888 8907 attr->dtat_name = DOF_ATTR_NAME(dofattr);
8889 8908 attr->dtat_data = DOF_ATTR_DATA(dofattr);
8890 8909 attr->dtat_class = DOF_ATTR_CLASS(dofattr);
8891 8910 }
8892 8911
8893 8912 static void
8894 8913 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
8895 8914 const dof_provider_t *dofprov, char *strtab)
8896 8915 {
8897 8916 hprov->dthpv_provname = strtab + dofprov->dofpv_name;
8898 8917 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
8899 8918 dofprov->dofpv_provattr);
8900 8919 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
8901 8920 dofprov->dofpv_modattr);
8902 8921 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
8903 8922 dofprov->dofpv_funcattr);
8904 8923 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
8905 8924 dofprov->dofpv_nameattr);
8906 8925 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
8907 8926 dofprov->dofpv_argsattr);
8908 8927 }
8909 8928
8910 8929 static void
8911 8930 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
8912 8931 {
8913 8932 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8914 8933 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8915 8934 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
8916 8935 dof_provider_t *provider;
8917 8936 dof_probe_t *probe;
8918 8937 uint32_t *off, *enoff;
8919 8938 uint8_t *arg;
8920 8939 char *strtab;
8921 8940 uint_t i, nprobes;
8922 8941 dtrace_helper_provdesc_t dhpv;
8923 8942 dtrace_helper_probedesc_t dhpb;
8924 8943 dtrace_meta_t *meta = dtrace_meta_pid;
8925 8944 dtrace_mops_t *mops = &meta->dtm_mops;
8926 8945 void *parg;
8927 8946
8928 8947 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8929 8948 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8930 8949 provider->dofpv_strtab * dof->dofh_secsize);
8931 8950 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8932 8951 provider->dofpv_probes * dof->dofh_secsize);
8933 8952 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8934 8953 provider->dofpv_prargs * dof->dofh_secsize);
8935 8954 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8936 8955 provider->dofpv_proffs * dof->dofh_secsize);
8937 8956
8938 8957 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8939 8958 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
8940 8959 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
8941 8960 enoff = NULL;
8942 8961
8943 8962 /*
8944 8963 * See dtrace_helper_provider_validate().
8945 8964 */
8946 8965 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
8947 8966 provider->dofpv_prenoffs != DOF_SECT_NONE) {
8948 8967 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8949 8968 provider->dofpv_prenoffs * dof->dofh_secsize);
8950 8969 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
8951 8970 }
8952 8971
8953 8972 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
8954 8973
8955 8974 /*
8956 8975 * Create the provider.
8957 8976 */
8958 8977 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8959 8978
8960 8979 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
8961 8980 return;
8962 8981
8963 8982 meta->dtm_count++;
8964 8983
8965 8984 /*
8966 8985 * Create the probes.
8967 8986 */
8968 8987 for (i = 0; i < nprobes; i++) {
8969 8988 probe = (dof_probe_t *)(uintptr_t)(daddr +
8970 8989 prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
8971 8990
8972 8991 dhpb.dthpb_mod = dhp->dofhp_mod;
8973 8992 dhpb.dthpb_func = strtab + probe->dofpr_func;
8974 8993 dhpb.dthpb_name = strtab + probe->dofpr_name;
8975 8994 dhpb.dthpb_base = probe->dofpr_addr;
8976 8995 dhpb.dthpb_offs = off + probe->dofpr_offidx;
8977 8996 dhpb.dthpb_noffs = probe->dofpr_noffs;
8978 8997 if (enoff != NULL) {
8979 8998 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
8980 8999 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
8981 9000 } else {
8982 9001 dhpb.dthpb_enoffs = NULL;
8983 9002 dhpb.dthpb_nenoffs = 0;
8984 9003 }
8985 9004 dhpb.dthpb_args = arg + probe->dofpr_argidx;
8986 9005 dhpb.dthpb_nargc = probe->dofpr_nargc;
8987 9006 dhpb.dthpb_xargc = probe->dofpr_xargc;
8988 9007 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
8989 9008 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
8990 9009
8991 9010 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
8992 9011 }
8993 9012 }
8994 9013
8995 9014 static void
8996 9015 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
8997 9016 {
8998 9017 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8999 9018 dof_hdr_t *dof = (dof_hdr_t *)daddr;
9000 9019 int i;
9001 9020
9002 9021 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
9003 9022
9004 9023 for (i = 0; i < dof->dofh_secnum; i++) {
9005 9024 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
9006 9025 dof->dofh_secoff + i * dof->dofh_secsize);
9007 9026
9008 9027 if (sec->dofs_type != DOF_SECT_PROVIDER)
9009 9028 continue;
9010 9029
9011 9030 dtrace_helper_provide_one(dhp, sec, pid);
9012 9031 }
9013 9032
9014 9033 /*
9015 9034 * We may have just created probes, so we must now rematch against
9016 9035 * any retained enablings. Note that this call will acquire both
9017 9036 * cpu_lock and dtrace_lock; the fact that we are holding
9018 9037 * dtrace_meta_lock now is what defines the ordering with respect to
9019 9038 * these three locks.
9020 9039 */
9021 9040 dtrace_enabling_matchall();
9022 9041 }
9023 9042
9024 9043 static void
9025 9044 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
9026 9045 {
9027 9046 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
9028 9047 dof_hdr_t *dof = (dof_hdr_t *)daddr;
9029 9048 dof_sec_t *str_sec;
9030 9049 dof_provider_t *provider;
9031 9050 char *strtab;
9032 9051 dtrace_helper_provdesc_t dhpv;
9033 9052 dtrace_meta_t *meta = dtrace_meta_pid;
9034 9053 dtrace_mops_t *mops = &meta->dtm_mops;
9035 9054
9036 9055 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
9037 9056 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9038 9057 provider->dofpv_strtab * dof->dofh_secsize);
9039 9058
9040 9059 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
9041 9060
9042 9061 /*
9043 9062 * Create the provider.
9044 9063 */
9045 9064 dtrace_dofprov2hprov(&dhpv, provider, strtab);
9046 9065
9047 9066 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
9048 9067
9049 9068 meta->dtm_count--;
9050 9069 }
9051 9070
9052 9071 static void
9053 9072 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
9054 9073 {
9055 9074 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
9056 9075 dof_hdr_t *dof = (dof_hdr_t *)daddr;
9057 9076 int i;
9058 9077
9059 9078 ASSERT(MUTEX_HELD(&dtrace_meta_lock));
9060 9079
9061 9080 for (i = 0; i < dof->dofh_secnum; i++) {
9062 9081 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
9063 9082 dof->dofh_secoff + i * dof->dofh_secsize);
9064 9083
9065 9084 if (sec->dofs_type != DOF_SECT_PROVIDER)
9066 9085 continue;
9067 9086
9068 9087 dtrace_helper_provider_remove_one(dhp, sec, pid);
9069 9088 }
9070 9089 }
9071 9090
9072 9091 /*
9073 9092 * DTrace Meta Provider-to-Framework API Functions
9074 9093 *
9075 9094 * These functions implement the Meta Provider-to-Framework API, as described
9076 9095 * in <sys/dtrace.h>.
9077 9096 */
9078 9097 int
9079 9098 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
9080 9099 dtrace_meta_provider_id_t *idp)
9081 9100 {
9082 9101 dtrace_meta_t *meta;
9083 9102 dtrace_helpers_t *help, *next;
9084 9103 int i;
9085 9104
9086 9105 *idp = DTRACE_METAPROVNONE;
9087 9106
9088 9107 /*
9089 9108 * We strictly don't need the name, but we hold onto it for
9090 9109 * debuggability. All hail error queues!
9091 9110 */
9092 9111 if (name == NULL) {
9093 9112 cmn_err(CE_WARN, "failed to register meta-provider: "
9094 9113 "invalid name");
9095 9114 return (EINVAL);
9096 9115 }
9097 9116
9098 9117 if (mops == NULL ||
9099 9118 mops->dtms_create_probe == NULL ||
9100 9119 mops->dtms_provide_pid == NULL ||
9101 9120 mops->dtms_remove_pid == NULL) {
9102 9121 cmn_err(CE_WARN, "failed to register meta-register %s: "
9103 9122 "invalid ops", name);
9104 9123 return (EINVAL);
9105 9124 }
9106 9125
9107 9126 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
9108 9127 meta->dtm_mops = *mops;
9109 9128 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
9110 9129 (void) strcpy(meta->dtm_name, name);
9111 9130 meta->dtm_arg = arg;
9112 9131
9113 9132 mutex_enter(&dtrace_meta_lock);
9114 9133 mutex_enter(&dtrace_lock);
9115 9134
9116 9135 if (dtrace_meta_pid != NULL) {
9117 9136 mutex_exit(&dtrace_lock);
9118 9137 mutex_exit(&dtrace_meta_lock);
9119 9138 cmn_err(CE_WARN, "failed to register meta-register %s: "
9120 9139 "user-land meta-provider exists", name);
9121 9140 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
9122 9141 kmem_free(meta, sizeof (dtrace_meta_t));
9123 9142 return (EINVAL);
9124 9143 }
9125 9144
9126 9145 dtrace_meta_pid = meta;
9127 9146 *idp = (dtrace_meta_provider_id_t)meta;
9128 9147
9129 9148 /*
9130 9149 * If there are providers and probes ready to go, pass them
9131 9150 * off to the new meta provider now.
9132 9151 */
9133 9152
9134 9153 help = dtrace_deferred_pid;
9135 9154 dtrace_deferred_pid = NULL;
9136 9155
9137 9156 mutex_exit(&dtrace_lock);
9138 9157
9139 9158 while (help != NULL) {
9140 9159 for (i = 0; i < help->dthps_nprovs; i++) {
9141 9160 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
9142 9161 help->dthps_pid);
9143 9162 }
9144 9163
9145 9164 next = help->dthps_next;
9146 9165 help->dthps_next = NULL;
9147 9166 help->dthps_prev = NULL;
9148 9167 help->dthps_deferred = 0;
9149 9168 help = next;
9150 9169 }
9151 9170
9152 9171 mutex_exit(&dtrace_meta_lock);
9153 9172
9154 9173 return (0);
9155 9174 }
9156 9175
9157 9176 int
9158 9177 dtrace_meta_unregister(dtrace_meta_provider_id_t id)
9159 9178 {
9160 9179 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
9161 9180
9162 9181 mutex_enter(&dtrace_meta_lock);
9163 9182 mutex_enter(&dtrace_lock);
9164 9183
9165 9184 if (old == dtrace_meta_pid) {
9166 9185 pp = &dtrace_meta_pid;
9167 9186 } else {
9168 9187 panic("attempt to unregister non-existent "
9169 9188 "dtrace meta-provider %p\n", (void *)old);
9170 9189 }
9171 9190
9172 9191 if (old->dtm_count != 0) {
9173 9192 mutex_exit(&dtrace_lock);
9174 9193 mutex_exit(&dtrace_meta_lock);
9175 9194 return (EBUSY);
9176 9195 }
9177 9196
9178 9197 *pp = NULL;
9179 9198
9180 9199 mutex_exit(&dtrace_lock);
9181 9200 mutex_exit(&dtrace_meta_lock);
9182 9201
9183 9202 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
9184 9203 kmem_free(old, sizeof (dtrace_meta_t));
9185 9204
9186 9205 return (0);
9187 9206 }
9188 9207
9189 9208
9190 9209 /*
9191 9210 * DTrace DIF Object Functions
9192 9211 */
9193 9212 static int
9194 9213 dtrace_difo_err(uint_t pc, const char *format, ...)
9195 9214 {
9196 9215 if (dtrace_err_verbose) {
9197 9216 va_list alist;
9198 9217
9199 9218 (void) uprintf("dtrace DIF object error: [%u]: ", pc);
9200 9219 va_start(alist, format);
9201 9220 (void) vuprintf(format, alist);
9202 9221 va_end(alist);
9203 9222 }
9204 9223
9205 9224 #ifdef DTRACE_ERRDEBUG
9206 9225 dtrace_errdebug(format);
9207 9226 #endif
9208 9227 return (1);
9209 9228 }
9210 9229
9211 9230 /*
9212 9231 * Validate a DTrace DIF object by checking the IR instructions. The following
9213 9232 * rules are currently enforced by dtrace_difo_validate():
9214 9233 *
9215 9234 * 1. Each instruction must have a valid opcode
9216 9235 * 2. Each register, string, variable, or subroutine reference must be valid
9217 9236 * 3. No instruction can modify register %r0 (must be zero)
9218 9237 * 4. All instruction reserved bits must be set to zero
9219 9238 * 5. The last instruction must be a "ret" instruction
9220 9239 * 6. All branch targets must reference a valid instruction _after_ the branch
9221 9240 */
9222 9241 static int
9223 9242 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
9224 9243 cred_t *cr)
9225 9244 {
9226 9245 int err = 0, i;
9227 9246 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
9228 9247 int kcheckload;
9229 9248 uint_t pc;
9230 9249 int maxglobal = -1, maxlocal = -1, maxtlocal = -1;
9231 9250
9232 9251 kcheckload = cr == NULL ||
9233 9252 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
9234 9253
9235 9254 dp->dtdo_destructive = 0;
9236 9255
9237 9256 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
9238 9257 dif_instr_t instr = dp->dtdo_buf[pc];
9239 9258
9240 9259 uint_t r1 = DIF_INSTR_R1(instr);
9241 9260 uint_t r2 = DIF_INSTR_R2(instr);
9242 9261 uint_t rd = DIF_INSTR_RD(instr);
9243 9262 uint_t rs = DIF_INSTR_RS(instr);
9244 9263 uint_t label = DIF_INSTR_LABEL(instr);
9245 9264 uint_t v = DIF_INSTR_VAR(instr);
9246 9265 uint_t subr = DIF_INSTR_SUBR(instr);
9247 9266 uint_t type = DIF_INSTR_TYPE(instr);
9248 9267 uint_t op = DIF_INSTR_OP(instr);
9249 9268
9250 9269 switch (op) {
9251 9270 case DIF_OP_OR:
9252 9271 case DIF_OP_XOR:
9253 9272 case DIF_OP_AND:
9254 9273 case DIF_OP_SLL:
9255 9274 case DIF_OP_SRL:
9256 9275 case DIF_OP_SRA:
9257 9276 case DIF_OP_SUB:
9258 9277 case DIF_OP_ADD:
9259 9278 case DIF_OP_MUL:
9260 9279 case DIF_OP_SDIV:
9261 9280 case DIF_OP_UDIV:
9262 9281 case DIF_OP_SREM:
9263 9282 case DIF_OP_UREM:
9264 9283 case DIF_OP_COPYS:
9265 9284 if (r1 >= nregs)
9266 9285 err += efunc(pc, "invalid register %u\n", r1);
9267 9286 if (r2 >= nregs)
9268 9287 err += efunc(pc, "invalid register %u\n", r2);
9269 9288 if (rd >= nregs)
9270 9289 err += efunc(pc, "invalid register %u\n", rd);
9271 9290 if (rd == 0)
9272 9291 err += efunc(pc, "cannot write to %r0\n");
9273 9292 break;
9274 9293 case DIF_OP_NOT:
9275 9294 case DIF_OP_MOV:
9276 9295 case DIF_OP_ALLOCS:
9277 9296 if (r1 >= nregs)
9278 9297 err += efunc(pc, "invalid register %u\n", r1);
9279 9298 if (r2 != 0)
9280 9299 err += efunc(pc, "non-zero reserved bits\n");
9281 9300 if (rd >= nregs)
9282 9301 err += efunc(pc, "invalid register %u\n", rd);
9283 9302 if (rd == 0)
9284 9303 err += efunc(pc, "cannot write to %r0\n");
9285 9304 break;
9286 9305 case DIF_OP_LDSB:
9287 9306 case DIF_OP_LDSH:
9288 9307 case DIF_OP_LDSW:
9289 9308 case DIF_OP_LDUB:
9290 9309 case DIF_OP_LDUH:
9291 9310 case DIF_OP_LDUW:
9292 9311 case DIF_OP_LDX:
9293 9312 if (r1 >= nregs)
9294 9313 err += efunc(pc, "invalid register %u\n", r1);
9295 9314 if (r2 != 0)
9296 9315 err += efunc(pc, "non-zero reserved bits\n");
9297 9316 if (rd >= nregs)
9298 9317 err += efunc(pc, "invalid register %u\n", rd);
9299 9318 if (rd == 0)
9300 9319 err += efunc(pc, "cannot write to %r0\n");
9301 9320 if (kcheckload)
9302 9321 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
9303 9322 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
9304 9323 break;
9305 9324 case DIF_OP_RLDSB:
9306 9325 case DIF_OP_RLDSH:
9307 9326 case DIF_OP_RLDSW:
9308 9327 case DIF_OP_RLDUB:
9309 9328 case DIF_OP_RLDUH:
9310 9329 case DIF_OP_RLDUW:
9311 9330 case DIF_OP_RLDX:
9312 9331 if (r1 >= nregs)
9313 9332 err += efunc(pc, "invalid register %u\n", r1);
9314 9333 if (r2 != 0)
9315 9334 err += efunc(pc, "non-zero reserved bits\n");
9316 9335 if (rd >= nregs)
9317 9336 err += efunc(pc, "invalid register %u\n", rd);
9318 9337 if (rd == 0)
9319 9338 err += efunc(pc, "cannot write to %r0\n");
9320 9339 break;
9321 9340 case DIF_OP_ULDSB:
9322 9341 case DIF_OP_ULDSH:
9323 9342 case DIF_OP_ULDSW:
9324 9343 case DIF_OP_ULDUB:
9325 9344 case DIF_OP_ULDUH:
9326 9345 case DIF_OP_ULDUW:
9327 9346 case DIF_OP_ULDX:
9328 9347 if (r1 >= nregs)
9329 9348 err += efunc(pc, "invalid register %u\n", r1);
9330 9349 if (r2 != 0)
9331 9350 err += efunc(pc, "non-zero reserved bits\n");
9332 9351 if (rd >= nregs)
9333 9352 err += efunc(pc, "invalid register %u\n", rd);
9334 9353 if (rd == 0)
9335 9354 err += efunc(pc, "cannot write to %r0\n");
9336 9355 break;
9337 9356 case DIF_OP_STB:
9338 9357 case DIF_OP_STH:
9339 9358 case DIF_OP_STW:
9340 9359 case DIF_OP_STX:
9341 9360 if (r1 >= nregs)
9342 9361 err += efunc(pc, "invalid register %u\n", r1);
9343 9362 if (r2 != 0)
9344 9363 err += efunc(pc, "non-zero reserved bits\n");
9345 9364 if (rd >= nregs)
9346 9365 err += efunc(pc, "invalid register %u\n", rd);
9347 9366 if (rd == 0)
9348 9367 err += efunc(pc, "cannot write to 0 address\n");
9349 9368 break;
9350 9369 case DIF_OP_CMP:
9351 9370 case DIF_OP_SCMP:
9352 9371 if (r1 >= nregs)
9353 9372 err += efunc(pc, "invalid register %u\n", r1);
9354 9373 if (r2 >= nregs)
9355 9374 err += efunc(pc, "invalid register %u\n", r2);
9356 9375 if (rd != 0)
9357 9376 err += efunc(pc, "non-zero reserved bits\n");
9358 9377 break;
9359 9378 case DIF_OP_TST:
9360 9379 if (r1 >= nregs)
9361 9380 err += efunc(pc, "invalid register %u\n", r1);
9362 9381 if (r2 != 0 || rd != 0)
9363 9382 err += efunc(pc, "non-zero reserved bits\n");
9364 9383 break;
9365 9384 case DIF_OP_BA:
9366 9385 case DIF_OP_BE:
9367 9386 case DIF_OP_BNE:
9368 9387 case DIF_OP_BG:
9369 9388 case DIF_OP_BGU:
9370 9389 case DIF_OP_BGE:
9371 9390 case DIF_OP_BGEU:
9372 9391 case DIF_OP_BL:
9373 9392 case DIF_OP_BLU:
9374 9393 case DIF_OP_BLE:
9375 9394 case DIF_OP_BLEU:
9376 9395 if (label >= dp->dtdo_len) {
9377 9396 err += efunc(pc, "invalid branch target %u\n",
9378 9397 label);
9379 9398 }
9380 9399 if (label <= pc) {
9381 9400 err += efunc(pc, "backward branch to %u\n",
9382 9401 label);
9383 9402 }
9384 9403 break;
9385 9404 case DIF_OP_RET:
9386 9405 if (r1 != 0 || r2 != 0)
9387 9406 err += efunc(pc, "non-zero reserved bits\n");
9388 9407 if (rd >= nregs)
9389 9408 err += efunc(pc, "invalid register %u\n", rd);
9390 9409 break;
9391 9410 case DIF_OP_NOP:
9392 9411 case DIF_OP_POPTS:
9393 9412 case DIF_OP_FLUSHTS:
9394 9413 if (r1 != 0 || r2 != 0 || rd != 0)
9395 9414 err += efunc(pc, "non-zero reserved bits\n");
9396 9415 break;
9397 9416 case DIF_OP_SETX:
9398 9417 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
9399 9418 err += efunc(pc, "invalid integer ref %u\n",
9400 9419 DIF_INSTR_INTEGER(instr));
9401 9420 }
9402 9421 if (rd >= nregs)
9403 9422 err += efunc(pc, "invalid register %u\n", rd);
9404 9423 if (rd == 0)
9405 9424 err += efunc(pc, "cannot write to %r0\n");
9406 9425 break;
9407 9426 case DIF_OP_SETS:
9408 9427 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
9409 9428 err += efunc(pc, "invalid string ref %u\n",
9410 9429 DIF_INSTR_STRING(instr));
9411 9430 }
9412 9431 if (rd >= nregs)
9413 9432 err += efunc(pc, "invalid register %u\n", rd);
9414 9433 if (rd == 0)
9415 9434 err += efunc(pc, "cannot write to %r0\n");
9416 9435 break;
9417 9436 case DIF_OP_LDGA:
9418 9437 case DIF_OP_LDTA:
9419 9438 if (r1 > DIF_VAR_ARRAY_MAX)
9420 9439 err += efunc(pc, "invalid array %u\n", r1);
9421 9440 if (r2 >= nregs)
9422 9441 err += efunc(pc, "invalid register %u\n", r2);
9423 9442 if (rd >= nregs)
9424 9443 err += efunc(pc, "invalid register %u\n", rd);
9425 9444 if (rd == 0)
9426 9445 err += efunc(pc, "cannot write to %r0\n");
9427 9446 break;
9428 9447 case DIF_OP_STGA:
9429 9448 if (r1 > DIF_VAR_ARRAY_MAX)
9430 9449 err += efunc(pc, "invalid array %u\n", r1);
9431 9450 if (r2 >= nregs)
9432 9451 err += efunc(pc, "invalid register %u\n", r2);
9433 9452 if (rd >= nregs)
9434 9453 err += efunc(pc, "invalid register %u\n", rd);
9435 9454 dp->dtdo_destructive = 1;
9436 9455 break;
9437 9456 case DIF_OP_LDGS:
9438 9457 case DIF_OP_LDTS:
9439 9458 case DIF_OP_LDLS:
9440 9459 case DIF_OP_LDGAA:
9441 9460 case DIF_OP_LDTAA:
9442 9461 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
9443 9462 err += efunc(pc, "invalid variable %u\n", v);
9444 9463 if (rd >= nregs)
9445 9464 err += efunc(pc, "invalid register %u\n", rd);
9446 9465 if (rd == 0)
9447 9466 err += efunc(pc, "cannot write to %r0\n");
9448 9467 break;
9449 9468 case DIF_OP_STGS:
9450 9469 case DIF_OP_STTS:
9451 9470 case DIF_OP_STLS:
9452 9471 case DIF_OP_STGAA:
9453 9472 case DIF_OP_STTAA:
9454 9473 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
9455 9474 err += efunc(pc, "invalid variable %u\n", v);
9456 9475 if (rs >= nregs)
9457 9476 err += efunc(pc, "invalid register %u\n", rd);
9458 9477 break;
9459 9478 case DIF_OP_CALL:
9460 9479 if (subr > DIF_SUBR_MAX)
9461 9480 err += efunc(pc, "invalid subr %u\n", subr);
9462 9481 if (rd >= nregs)
9463 9482 err += efunc(pc, "invalid register %u\n", rd);
9464 9483 if (rd == 0)
9465 9484 err += efunc(pc, "cannot write to %r0\n");
9466 9485
9467 9486 if (subr == DIF_SUBR_COPYOUT ||
9468 9487 subr == DIF_SUBR_COPYOUTSTR) {
9469 9488 dp->dtdo_destructive = 1;
9470 9489 }
9471 9490
9472 9491 if (subr == DIF_SUBR_GETF) {
9473 9492 /*
9474 9493 * If we have a getf() we need to record that
9475 9494 * in our state. Note that our state can be
9476 9495 * NULL if this is a helper -- but in that
9477 9496 * case, the call to getf() is itself illegal,
9478 9497 * and will be caught (slightly later) when
9479 9498 * the helper is validated.
9480 9499 */
9481 9500 if (vstate->dtvs_state != NULL)
9482 9501 vstate->dtvs_state->dts_getf++;
9483 9502 }
9484 9503
9485 9504 break;
9486 9505 case DIF_OP_PUSHTR:
9487 9506 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
9488 9507 err += efunc(pc, "invalid ref type %u\n", type);
9489 9508 if (r2 >= nregs)
9490 9509 err += efunc(pc, "invalid register %u\n", r2);
9491 9510 if (rs >= nregs)
9492 9511 err += efunc(pc, "invalid register %u\n", rs);
9493 9512 break;
9494 9513 case DIF_OP_PUSHTV:
9495 9514 if (type != DIF_TYPE_CTF)
9496 9515 err += efunc(pc, "invalid val type %u\n", type);
9497 9516 if (r2 >= nregs)
9498 9517 err += efunc(pc, "invalid register %u\n", r2);
9499 9518 if (rs >= nregs)
9500 9519 err += efunc(pc, "invalid register %u\n", rs);
9501 9520 break;
9502 9521 default:
9503 9522 err += efunc(pc, "invalid opcode %u\n",
9504 9523 DIF_INSTR_OP(instr));
9505 9524 }
9506 9525 }
9507 9526
9508 9527 if (dp->dtdo_len != 0 &&
9509 9528 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
9510 9529 err += efunc(dp->dtdo_len - 1,
9511 9530 "expected 'ret' as last DIF instruction\n");
9512 9531 }
9513 9532
9514 9533 if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
9515 9534 /*
9516 9535 * If we're not returning by reference, the size must be either
9517 9536 * 0 or the size of one of the base types.
9518 9537 */
9519 9538 switch (dp->dtdo_rtype.dtdt_size) {
9520 9539 case 0:
9521 9540 case sizeof (uint8_t):
9522 9541 case sizeof (uint16_t):
9523 9542 case sizeof (uint32_t):
9524 9543 case sizeof (uint64_t):
9525 9544 break;
9526 9545
9527 9546 default:
9528 9547 err += efunc(dp->dtdo_len - 1, "bad return size\n");
9529 9548 }
9530 9549 }
9531 9550
9532 9551 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
9533 9552 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
9534 9553 dtrace_diftype_t *vt, *et;
9535 9554 uint_t id, ndx;
9536 9555
9537 9556 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
9538 9557 v->dtdv_scope != DIFV_SCOPE_THREAD &&
9539 9558 v->dtdv_scope != DIFV_SCOPE_LOCAL) {
9540 9559 err += efunc(i, "unrecognized variable scope %d\n",
9541 9560 v->dtdv_scope);
9542 9561 break;
9543 9562 }
9544 9563
9545 9564 if (v->dtdv_kind != DIFV_KIND_ARRAY &&
9546 9565 v->dtdv_kind != DIFV_KIND_SCALAR) {
9547 9566 err += efunc(i, "unrecognized variable type %d\n",
9548 9567 v->dtdv_kind);
9549 9568 break;
9550 9569 }
9551 9570
9552 9571 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
9553 9572 err += efunc(i, "%d exceeds variable id limit\n", id);
9554 9573 break;
9555 9574 }
9556 9575
9557 9576 if (id < DIF_VAR_OTHER_UBASE)
9558 9577 continue;
9559 9578
9560 9579 /*
9561 9580 * For user-defined variables, we need to check that this
9562 9581 * definition is identical to any previous definition that we
9563 9582 * encountered.
9564 9583 */
9565 9584 ndx = id - DIF_VAR_OTHER_UBASE;
9566 9585
9567 9586 switch (v->dtdv_scope) {
9568 9587 case DIFV_SCOPE_GLOBAL:
9569 9588 if (maxglobal == -1 || ndx > maxglobal)
9570 9589 maxglobal = ndx;
9571 9590
9572 9591 if (ndx < vstate->dtvs_nglobals) {
9573 9592 dtrace_statvar_t *svar;
9574 9593
9575 9594 if ((svar = vstate->dtvs_globals[ndx]) != NULL)
9576 9595 existing = &svar->dtsv_var;
9577 9596 }
9578 9597
9579 9598 break;
9580 9599
9581 9600 case DIFV_SCOPE_THREAD:
9582 9601 if (maxtlocal == -1 || ndx > maxtlocal)
9583 9602 maxtlocal = ndx;
9584 9603
9585 9604 if (ndx < vstate->dtvs_ntlocals)
9586 9605 existing = &vstate->dtvs_tlocals[ndx];
9587 9606 break;
9588 9607
9589 9608 case DIFV_SCOPE_LOCAL:
9590 9609 if (maxlocal == -1 || ndx > maxlocal)
9591 9610 maxlocal = ndx;
9592 9611
9593 9612 if (ndx < vstate->dtvs_nlocals) {
9594 9613 dtrace_statvar_t *svar;
9595 9614
9596 9615 if ((svar = vstate->dtvs_locals[ndx]) != NULL)
9597 9616 existing = &svar->dtsv_var;
9598 9617 }
9599 9618
9600 9619 break;
9601 9620 }
9602 9621
9603 9622 vt = &v->dtdv_type;
9604 9623
9605 9624 if (vt->dtdt_flags & DIF_TF_BYREF) {
9606 9625 if (vt->dtdt_size == 0) {
9607 9626 err += efunc(i, "zero-sized variable\n");
9608 9627 break;
9609 9628 }
9610 9629
9611 9630 if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL ||
9612 9631 v->dtdv_scope == DIFV_SCOPE_LOCAL) &&
9613 9632 vt->dtdt_size > dtrace_statvar_maxsize) {
9614 9633 err += efunc(i, "oversized by-ref static\n");
9615 9634 break;
9616 9635 }
9617 9636 }
9618 9637
9619 9638 if (existing == NULL || existing->dtdv_id == 0)
9620 9639 continue;
9621 9640
9622 9641 ASSERT(existing->dtdv_id == v->dtdv_id);
9623 9642 ASSERT(existing->dtdv_scope == v->dtdv_scope);
9624 9643
9625 9644 if (existing->dtdv_kind != v->dtdv_kind)
9626 9645 err += efunc(i, "%d changed variable kind\n", id);
9627 9646
9628 9647 et = &existing->dtdv_type;
9629 9648
9630 9649 if (vt->dtdt_flags != et->dtdt_flags) {
9631 9650 err += efunc(i, "%d changed variable type flags\n", id);
9632 9651 break;
9633 9652 }
9634 9653
9635 9654 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
9636 9655 err += efunc(i, "%d changed variable type size\n", id);
9637 9656 break;
9638 9657 }
9639 9658 }
9640 9659
9641 9660 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
9642 9661 dif_instr_t instr = dp->dtdo_buf[pc];
9643 9662
9644 9663 uint_t v = DIF_INSTR_VAR(instr);
9645 9664 uint_t op = DIF_INSTR_OP(instr);
9646 9665
9647 9666 switch (op) {
9648 9667 case DIF_OP_LDGS:
9649 9668 case DIF_OP_LDGAA:
9650 9669 case DIF_OP_STGS:
9651 9670 case DIF_OP_STGAA:
9652 9671 if (v > DIF_VAR_OTHER_UBASE + maxglobal)
9653 9672 err += efunc(pc, "invalid variable %u\n", v);
9654 9673 break;
9655 9674 case DIF_OP_LDTS:
9656 9675 case DIF_OP_LDTAA:
9657 9676 case DIF_OP_STTS:
9658 9677 case DIF_OP_STTAA:
9659 9678 if (v > DIF_VAR_OTHER_UBASE + maxtlocal)
9660 9679 err += efunc(pc, "invalid variable %u\n", v);
9661 9680 break;
9662 9681 case DIF_OP_LDLS:
9663 9682 case DIF_OP_STLS:
9664 9683 if (v > DIF_VAR_OTHER_UBASE + maxlocal)
9665 9684 err += efunc(pc, "invalid variable %u\n", v);
9666 9685 break;
9667 9686 default:
9668 9687 break;
9669 9688 }
9670 9689 }
9671 9690
9672 9691 return (err);
9673 9692 }
9674 9693
9675 9694 /*
9676 9695 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
9677 9696 * are much more constrained than normal DIFOs. Specifically, they may
9678 9697 * not:
9679 9698 *
9680 9699 * 1. Make calls to subroutines other than copyin(), copyinstr() or
9681 9700 * miscellaneous string routines
9682 9701 * 2. Access DTrace variables other than the args[] array, and the
9683 9702 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
9684 9703 * 3. Have thread-local variables.
9685 9704 * 4. Have dynamic variables.
9686 9705 */
9687 9706 static int
9688 9707 dtrace_difo_validate_helper(dtrace_difo_t *dp)
9689 9708 {
9690 9709 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
9691 9710 int err = 0;
9692 9711 uint_t pc;
9693 9712
9694 9713 for (pc = 0; pc < dp->dtdo_len; pc++) {
9695 9714 dif_instr_t instr = dp->dtdo_buf[pc];
9696 9715
9697 9716 uint_t v = DIF_INSTR_VAR(instr);
9698 9717 uint_t subr = DIF_INSTR_SUBR(instr);
9699 9718 uint_t op = DIF_INSTR_OP(instr);
9700 9719
9701 9720 switch (op) {
9702 9721 case DIF_OP_OR:
9703 9722 case DIF_OP_XOR:
9704 9723 case DIF_OP_AND:
9705 9724 case DIF_OP_SLL:
9706 9725 case DIF_OP_SRL:
9707 9726 case DIF_OP_SRA:
9708 9727 case DIF_OP_SUB:
9709 9728 case DIF_OP_ADD:
9710 9729 case DIF_OP_MUL:
9711 9730 case DIF_OP_SDIV:
9712 9731 case DIF_OP_UDIV:
9713 9732 case DIF_OP_SREM:
9714 9733 case DIF_OP_UREM:
9715 9734 case DIF_OP_COPYS:
9716 9735 case DIF_OP_NOT:
9717 9736 case DIF_OP_MOV:
9718 9737 case DIF_OP_RLDSB:
9719 9738 case DIF_OP_RLDSH:
9720 9739 case DIF_OP_RLDSW:
9721 9740 case DIF_OP_RLDUB:
9722 9741 case DIF_OP_RLDUH:
9723 9742 case DIF_OP_RLDUW:
9724 9743 case DIF_OP_RLDX:
9725 9744 case DIF_OP_ULDSB:
9726 9745 case DIF_OP_ULDSH:
9727 9746 case DIF_OP_ULDSW:
9728 9747 case DIF_OP_ULDUB:
9729 9748 case DIF_OP_ULDUH:
9730 9749 case DIF_OP_ULDUW:
9731 9750 case DIF_OP_ULDX:
9732 9751 case DIF_OP_STB:
9733 9752 case DIF_OP_STH:
9734 9753 case DIF_OP_STW:
9735 9754 case DIF_OP_STX:
9736 9755 case DIF_OP_ALLOCS:
9737 9756 case DIF_OP_CMP:
9738 9757 case DIF_OP_SCMP:
9739 9758 case DIF_OP_TST:
9740 9759 case DIF_OP_BA:
9741 9760 case DIF_OP_BE:
9742 9761 case DIF_OP_BNE:
9743 9762 case DIF_OP_BG:
9744 9763 case DIF_OP_BGU:
9745 9764 case DIF_OP_BGE:
9746 9765 case DIF_OP_BGEU:
9747 9766 case DIF_OP_BL:
9748 9767 case DIF_OP_BLU:
9749 9768 case DIF_OP_BLE:
9750 9769 case DIF_OP_BLEU:
9751 9770 case DIF_OP_RET:
9752 9771 case DIF_OP_NOP:
9753 9772 case DIF_OP_POPTS:
9754 9773 case DIF_OP_FLUSHTS:
9755 9774 case DIF_OP_SETX:
9756 9775 case DIF_OP_SETS:
9757 9776 case DIF_OP_LDGA:
9758 9777 case DIF_OP_LDLS:
9759 9778 case DIF_OP_STGS:
9760 9779 case DIF_OP_STLS:
9761 9780 case DIF_OP_PUSHTR:
9762 9781 case DIF_OP_PUSHTV:
9763 9782 break;
9764 9783
9765 9784 case DIF_OP_LDGS:
9766 9785 if (v >= DIF_VAR_OTHER_UBASE)
9767 9786 break;
9768 9787
9769 9788 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
9770 9789 break;
9771 9790
9772 9791 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
9773 9792 v == DIF_VAR_PPID || v == DIF_VAR_TID ||
9774 9793 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
9775 9794 v == DIF_VAR_UID || v == DIF_VAR_GID)
9776 9795 break;
9777 9796
9778 9797 err += efunc(pc, "illegal variable %u\n", v);
9779 9798 break;
9780 9799
9781 9800 case DIF_OP_LDTA:
9782 9801 if (v < DIF_VAR_OTHER_UBASE) {
9783 9802 err += efunc(pc, "illegal variable load\n");
9784 9803 break;
9785 9804 }
9786 9805 /* FALLTHROUGH */
9787 9806 case DIF_OP_LDTS:
9788 9807 case DIF_OP_LDGAA:
9789 9808 case DIF_OP_LDTAA:
9790 9809 err += efunc(pc, "illegal dynamic variable load\n");
9791 9810 break;
9792 9811
9793 9812 case DIF_OP_STGA:
9794 9813 if (v < DIF_VAR_OTHER_UBASE) {
9795 9814 err += efunc(pc, "illegal variable store\n");
9796 9815 break;
9797 9816 }
9798 9817 /* FALLTHROUGH */
9799 9818 case DIF_OP_STTS:
9800 9819 case DIF_OP_STGAA:
9801 9820 case DIF_OP_STTAA:
9802 9821 err += efunc(pc, "illegal dynamic variable store\n");
9803 9822 break;
9804 9823
9805 9824 case DIF_OP_CALL:
9806 9825 if (subr == DIF_SUBR_ALLOCA ||
9807 9826 subr == DIF_SUBR_BCOPY ||
9808 9827 subr == DIF_SUBR_COPYIN ||
9809 9828 subr == DIF_SUBR_COPYINTO ||
9810 9829 subr == DIF_SUBR_COPYINSTR ||
9811 9830 subr == DIF_SUBR_INDEX ||
9812 9831 subr == DIF_SUBR_INET_NTOA ||
9813 9832 subr == DIF_SUBR_INET_NTOA6 ||
9814 9833 subr == DIF_SUBR_INET_NTOP ||
9815 9834 subr == DIF_SUBR_JSON ||
9816 9835 subr == DIF_SUBR_LLTOSTR ||
9817 9836 subr == DIF_SUBR_STRTOLL ||
9818 9837 subr == DIF_SUBR_RINDEX ||
9819 9838 subr == DIF_SUBR_STRCHR ||
9820 9839 subr == DIF_SUBR_STRJOIN ||
9821 9840 subr == DIF_SUBR_STRRCHR ||
9822 9841 subr == DIF_SUBR_STRSTR ||
9823 9842 subr == DIF_SUBR_HTONS ||
9824 9843 subr == DIF_SUBR_HTONL ||
9825 9844 subr == DIF_SUBR_HTONLL ||
9826 9845 subr == DIF_SUBR_NTOHS ||
9827 9846 subr == DIF_SUBR_NTOHL ||
9828 9847 subr == DIF_SUBR_NTOHLL)
9829 9848 break;
9830 9849
9831 9850 err += efunc(pc, "invalid subr %u\n", subr);
9832 9851 break;
9833 9852
9834 9853 default:
9835 9854 err += efunc(pc, "invalid opcode %u\n",
9836 9855 DIF_INSTR_OP(instr));
9837 9856 }
9838 9857 }
9839 9858
9840 9859 return (err);
9841 9860 }
9842 9861
9843 9862 /*
9844 9863 * Returns 1 if the expression in the DIF object can be cached on a per-thread
9845 9864 * basis; 0 if not.
9846 9865 */
9847 9866 static int
9848 9867 dtrace_difo_cacheable(dtrace_difo_t *dp)
9849 9868 {
9850 9869 int i;
9851 9870
9852 9871 if (dp == NULL)
9853 9872 return (0);
9854 9873
9855 9874 for (i = 0; i < dp->dtdo_varlen; i++) {
9856 9875 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9857 9876
9858 9877 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
9859 9878 continue;
9860 9879
9861 9880 switch (v->dtdv_id) {
9862 9881 case DIF_VAR_CURTHREAD:
9863 9882 case DIF_VAR_PID:
9864 9883 case DIF_VAR_TID:
9865 9884 case DIF_VAR_EXECNAME:
9866 9885 case DIF_VAR_ZONENAME:
9867 9886 break;
9868 9887
9869 9888 default:
9870 9889 return (0);
9871 9890 }
9872 9891 }
9873 9892
9874 9893 /*
9875 9894 * This DIF object may be cacheable. Now we need to look for any
9876 9895 * array loading instructions, any memory loading instructions, or
9877 9896 * any stores to thread-local variables.
9878 9897 */
9879 9898 for (i = 0; i < dp->dtdo_len; i++) {
9880 9899 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
9881 9900
9882 9901 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
9883 9902 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
9884 9903 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
9885 9904 op == DIF_OP_LDGA || op == DIF_OP_STTS)
9886 9905 return (0);
9887 9906 }
9888 9907
9889 9908 return (1);
9890 9909 }
9891 9910
9892 9911 static void
9893 9912 dtrace_difo_hold(dtrace_difo_t *dp)
9894 9913 {
9895 9914 int i;
9896 9915
9897 9916 ASSERT(MUTEX_HELD(&dtrace_lock));
9898 9917
9899 9918 dp->dtdo_refcnt++;
9900 9919 ASSERT(dp->dtdo_refcnt != 0);
9901 9920
9902 9921 /*
9903 9922 * We need to check this DIF object for references to the variable
9904 9923 * DIF_VAR_VTIMESTAMP.
9905 9924 */
9906 9925 for (i = 0; i < dp->dtdo_varlen; i++) {
9907 9926 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9908 9927
9909 9928 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9910 9929 continue;
9911 9930
9912 9931 if (dtrace_vtime_references++ == 0)
9913 9932 dtrace_vtime_enable();
9914 9933 }
9915 9934 }
9916 9935
9917 9936 /*
9918 9937 * This routine calculates the dynamic variable chunksize for a given DIF
9919 9938 * object. The calculation is not fool-proof, and can probably be tricked by
9920 9939 * malicious DIF -- but it works for all compiler-generated DIF. Because this
9921 9940 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
9922 9941 * if a dynamic variable size exceeds the chunksize.
9923 9942 */
9924 9943 static void
9925 9944 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9926 9945 {
9927 9946 uint64_t sval;
9928 9947 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
9929 9948 const dif_instr_t *text = dp->dtdo_buf;
9930 9949 uint_t pc, srd = 0;
9931 9950 uint_t ttop = 0;
9932 9951 size_t size, ksize;
9933 9952 uint_t id, i;
9934 9953
9935 9954 for (pc = 0; pc < dp->dtdo_len; pc++) {
9936 9955 dif_instr_t instr = text[pc];
9937 9956 uint_t op = DIF_INSTR_OP(instr);
9938 9957 uint_t rd = DIF_INSTR_RD(instr);
9939 9958 uint_t r1 = DIF_INSTR_R1(instr);
9940 9959 uint_t nkeys = 0;
9941 9960 uchar_t scope;
9942 9961
9943 9962 dtrace_key_t *key = tupregs;
9944 9963
9945 9964 switch (op) {
9946 9965 case DIF_OP_SETX:
9947 9966 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
9948 9967 srd = rd;
9949 9968 continue;
9950 9969
9951 9970 case DIF_OP_STTS:
9952 9971 key = &tupregs[DIF_DTR_NREGS];
9953 9972 key[0].dttk_size = 0;
9954 9973 key[1].dttk_size = 0;
9955 9974 nkeys = 2;
9956 9975 scope = DIFV_SCOPE_THREAD;
9957 9976 break;
9958 9977
9959 9978 case DIF_OP_STGAA:
9960 9979 case DIF_OP_STTAA:
9961 9980 nkeys = ttop;
9962 9981
9963 9982 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
9964 9983 key[nkeys++].dttk_size = 0;
9965 9984
9966 9985 key[nkeys++].dttk_size = 0;
9967 9986
9968 9987 if (op == DIF_OP_STTAA) {
9969 9988 scope = DIFV_SCOPE_THREAD;
9970 9989 } else {
9971 9990 scope = DIFV_SCOPE_GLOBAL;
9972 9991 }
9973 9992
9974 9993 break;
9975 9994
9976 9995 case DIF_OP_PUSHTR:
9977 9996 if (ttop == DIF_DTR_NREGS)
9978 9997 return;
9979 9998
9980 9999 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
9981 10000 /*
9982 10001 * If the register for the size of the "pushtr"
9983 10002 * is %r0 (or the value is 0) and the type is
9984 10003 * a string, we'll use the system-wide default
9985 10004 * string size.
9986 10005 */
9987 10006 tupregs[ttop++].dttk_size =
9988 10007 dtrace_strsize_default;
9989 10008 } else {
9990 10009 if (srd == 0)
9991 10010 return;
9992 10011
9993 10012 if (sval > LONG_MAX)
9994 10013 return;
9995 10014
9996 10015 tupregs[ttop++].dttk_size = sval;
9997 10016 }
9998 10017
9999 10018 break;
10000 10019
10001 10020 case DIF_OP_PUSHTV:
10002 10021 if (ttop == DIF_DTR_NREGS)
10003 10022 return;
10004 10023
10005 10024 tupregs[ttop++].dttk_size = 0;
10006 10025 break;
10007 10026
10008 10027 case DIF_OP_FLUSHTS:
10009 10028 ttop = 0;
10010 10029 break;
10011 10030
10012 10031 case DIF_OP_POPTS:
10013 10032 if (ttop != 0)
10014 10033 ttop--;
10015 10034 break;
10016 10035 }
10017 10036
10018 10037 sval = 0;
10019 10038 srd = 0;
10020 10039
10021 10040 if (nkeys == 0)
10022 10041 continue;
10023 10042
10024 10043 /*
10025 10044 * We have a dynamic variable allocation; calculate its size.
10026 10045 */
10027 10046 for (ksize = 0, i = 0; i < nkeys; i++)
10028 10047 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
10029 10048
10030 10049 size = sizeof (dtrace_dynvar_t);
10031 10050 size += sizeof (dtrace_key_t) * (nkeys - 1);
10032 10051 size += ksize;
10033 10052
10034 10053 /*
10035 10054 * Now we need to determine the size of the stored data.
10036 10055 */
10037 10056 id = DIF_INSTR_VAR(instr);
10038 10057
10039 10058 for (i = 0; i < dp->dtdo_varlen; i++) {
10040 10059 dtrace_difv_t *v = &dp->dtdo_vartab[i];
10041 10060
10042 10061 if (v->dtdv_id == id && v->dtdv_scope == scope) {
10043 10062 size += v->dtdv_type.dtdt_size;
10044 10063 break;
10045 10064 }
10046 10065 }
10047 10066
10048 10067 if (i == dp->dtdo_varlen)
10049 10068 return;
10050 10069
10051 10070 /*
10052 10071 * We have the size. If this is larger than the chunk size
10053 10072 * for our dynamic variable state, reset the chunk size.
10054 10073 */
10055 10074 size = P2ROUNDUP(size, sizeof (uint64_t));
10056 10075
10057 10076 /*
10058 10077 * Before setting the chunk size, check that we're not going
10059 10078 * to set it to a negative value...
10060 10079 */
10061 10080 if (size > LONG_MAX)
10062 10081 return;
10063 10082
10064 10083 /*
10065 10084 * ...and make certain that we didn't badly overflow.
10066 10085 */
10067 10086 if (size < ksize || size < sizeof (dtrace_dynvar_t))
10068 10087 return;
10069 10088
10070 10089 if (size > vstate->dtvs_dynvars.dtds_chunksize)
10071 10090 vstate->dtvs_dynvars.dtds_chunksize = size;
10072 10091 }
10073 10092 }
10074 10093
10075 10094 static void
10076 10095 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10077 10096 {
10078 10097 int i, oldsvars, osz, nsz, otlocals, ntlocals;
10079 10098 uint_t id;
10080 10099
10081 10100 ASSERT(MUTEX_HELD(&dtrace_lock));
10082 10101 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
10083 10102
10084 10103 for (i = 0; i < dp->dtdo_varlen; i++) {
10085 10104 dtrace_difv_t *v = &dp->dtdo_vartab[i];
10086 10105 dtrace_statvar_t *svar, ***svarp;
10087 10106 size_t dsize = 0;
10088 10107 uint8_t scope = v->dtdv_scope;
10089 10108 int *np;
10090 10109
10091 10110 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
10092 10111 continue;
10093 10112
10094 10113 id -= DIF_VAR_OTHER_UBASE;
10095 10114
10096 10115 switch (scope) {
10097 10116 case DIFV_SCOPE_THREAD:
10098 10117 while (id >= (otlocals = vstate->dtvs_ntlocals)) {
10099 10118 dtrace_difv_t *tlocals;
10100 10119
10101 10120 if ((ntlocals = (otlocals << 1)) == 0)
10102 10121 ntlocals = 1;
10103 10122
10104 10123 osz = otlocals * sizeof (dtrace_difv_t);
10105 10124 nsz = ntlocals * sizeof (dtrace_difv_t);
10106 10125
10107 10126 tlocals = kmem_zalloc(nsz, KM_SLEEP);
10108 10127
10109 10128 if (osz != 0) {
10110 10129 bcopy(vstate->dtvs_tlocals,
10111 10130 tlocals, osz);
10112 10131 kmem_free(vstate->dtvs_tlocals, osz);
10113 10132 }
10114 10133
10115 10134 vstate->dtvs_tlocals = tlocals;
10116 10135 vstate->dtvs_ntlocals = ntlocals;
10117 10136 }
10118 10137
10119 10138 vstate->dtvs_tlocals[id] = *v;
10120 10139 continue;
10121 10140
10122 10141 case DIFV_SCOPE_LOCAL:
10123 10142 np = &vstate->dtvs_nlocals;
10124 10143 svarp = &vstate->dtvs_locals;
10125 10144
10126 10145 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
10127 10146 dsize = NCPU * (v->dtdv_type.dtdt_size +
10128 10147 sizeof (uint64_t));
10129 10148 else
10130 10149 dsize = NCPU * sizeof (uint64_t);
10131 10150
10132 10151 break;
10133 10152
10134 10153 case DIFV_SCOPE_GLOBAL:
10135 10154 np = &vstate->dtvs_nglobals;
10136 10155 svarp = &vstate->dtvs_globals;
10137 10156
10138 10157 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
10139 10158 dsize = v->dtdv_type.dtdt_size +
10140 10159 sizeof (uint64_t);
10141 10160
10142 10161 break;
10143 10162
10144 10163 default:
10145 10164 ASSERT(0);
10146 10165 }
10147 10166
10148 10167 while (id >= (oldsvars = *np)) {
10149 10168 dtrace_statvar_t **statics;
10150 10169 int newsvars, oldsize, newsize;
10151 10170
10152 10171 if ((newsvars = (oldsvars << 1)) == 0)
10153 10172 newsvars = 1;
10154 10173
10155 10174 oldsize = oldsvars * sizeof (dtrace_statvar_t *);
10156 10175 newsize = newsvars * sizeof (dtrace_statvar_t *);
10157 10176
10158 10177 statics = kmem_zalloc(newsize, KM_SLEEP);
10159 10178
10160 10179 if (oldsize != 0) {
10161 10180 bcopy(*svarp, statics, oldsize);
10162 10181 kmem_free(*svarp, oldsize);
10163 10182 }
10164 10183
10165 10184 *svarp = statics;
10166 10185 *np = newsvars;
10167 10186 }
10168 10187
10169 10188 if ((svar = (*svarp)[id]) == NULL) {
10170 10189 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
10171 10190 svar->dtsv_var = *v;
10172 10191
10173 10192 if ((svar->dtsv_size = dsize) != 0) {
10174 10193 svar->dtsv_data = (uint64_t)(uintptr_t)
10175 10194 kmem_zalloc(dsize, KM_SLEEP);
10176 10195 }
10177 10196
10178 10197 (*svarp)[id] = svar;
10179 10198 }
10180 10199
10181 10200 svar->dtsv_refcnt++;
10182 10201 }
10183 10202
10184 10203 dtrace_difo_chunksize(dp, vstate);
10185 10204 dtrace_difo_hold(dp);
10186 10205 }
10187 10206
10188 10207 static dtrace_difo_t *
10189 10208 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10190 10209 {
10191 10210 dtrace_difo_t *new;
10192 10211 size_t sz;
10193 10212
10194 10213 ASSERT(dp->dtdo_buf != NULL);
10195 10214 ASSERT(dp->dtdo_refcnt != 0);
10196 10215
10197 10216 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
10198 10217
10199 10218 ASSERT(dp->dtdo_buf != NULL);
10200 10219 sz = dp->dtdo_len * sizeof (dif_instr_t);
10201 10220 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
10202 10221 bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
10203 10222 new->dtdo_len = dp->dtdo_len;
10204 10223
10205 10224 if (dp->dtdo_strtab != NULL) {
10206 10225 ASSERT(dp->dtdo_strlen != 0);
10207 10226 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
10208 10227 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
10209 10228 new->dtdo_strlen = dp->dtdo_strlen;
10210 10229 }
10211 10230
10212 10231 if (dp->dtdo_inttab != NULL) {
10213 10232 ASSERT(dp->dtdo_intlen != 0);
10214 10233 sz = dp->dtdo_intlen * sizeof (uint64_t);
10215 10234 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
10216 10235 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
10217 10236 new->dtdo_intlen = dp->dtdo_intlen;
10218 10237 }
10219 10238
10220 10239 if (dp->dtdo_vartab != NULL) {
10221 10240 ASSERT(dp->dtdo_varlen != 0);
10222 10241 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
10223 10242 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
10224 10243 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
10225 10244 new->dtdo_varlen = dp->dtdo_varlen;
10226 10245 }
10227 10246
10228 10247 dtrace_difo_init(new, vstate);
10229 10248 return (new);
10230 10249 }
10231 10250
10232 10251 static void
10233 10252 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10234 10253 {
10235 10254 int i;
10236 10255
10237 10256 ASSERT(dp->dtdo_refcnt == 0);
10238 10257
10239 10258 for (i = 0; i < dp->dtdo_varlen; i++) {
10240 10259 dtrace_difv_t *v = &dp->dtdo_vartab[i];
10241 10260 dtrace_statvar_t *svar, **svarp;
10242 10261 uint_t id;
10243 10262 uint8_t scope = v->dtdv_scope;
10244 10263 int *np;
10245 10264
10246 10265 switch (scope) {
10247 10266 case DIFV_SCOPE_THREAD:
10248 10267 continue;
10249 10268
10250 10269 case DIFV_SCOPE_LOCAL:
10251 10270 np = &vstate->dtvs_nlocals;
10252 10271 svarp = vstate->dtvs_locals;
10253 10272 break;
10254 10273
10255 10274 case DIFV_SCOPE_GLOBAL:
10256 10275 np = &vstate->dtvs_nglobals;
10257 10276 svarp = vstate->dtvs_globals;
10258 10277 break;
10259 10278
10260 10279 default:
10261 10280 ASSERT(0);
10262 10281 }
10263 10282
10264 10283 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
10265 10284 continue;
10266 10285
10267 10286 id -= DIF_VAR_OTHER_UBASE;
10268 10287 ASSERT(id < *np);
10269 10288
10270 10289 svar = svarp[id];
10271 10290 ASSERT(svar != NULL);
10272 10291 ASSERT(svar->dtsv_refcnt > 0);
10273 10292
10274 10293 if (--svar->dtsv_refcnt > 0)
10275 10294 continue;
10276 10295
10277 10296 if (svar->dtsv_size != 0) {
10278 10297 ASSERT(svar->dtsv_data != NULL);
10279 10298 kmem_free((void *)(uintptr_t)svar->dtsv_data,
10280 10299 svar->dtsv_size);
10281 10300 }
10282 10301
10283 10302 kmem_free(svar, sizeof (dtrace_statvar_t));
10284 10303 svarp[id] = NULL;
10285 10304 }
10286 10305
10287 10306 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
10288 10307 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
10289 10308 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
10290 10309 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
10291 10310
10292 10311 kmem_free(dp, sizeof (dtrace_difo_t));
10293 10312 }
10294 10313
10295 10314 static void
10296 10315 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10297 10316 {
10298 10317 int i;
10299 10318
10300 10319 ASSERT(MUTEX_HELD(&dtrace_lock));
10301 10320 ASSERT(dp->dtdo_refcnt != 0);
10302 10321
10303 10322 for (i = 0; i < dp->dtdo_varlen; i++) {
10304 10323 dtrace_difv_t *v = &dp->dtdo_vartab[i];
10305 10324
10306 10325 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
10307 10326 continue;
10308 10327
10309 10328 ASSERT(dtrace_vtime_references > 0);
10310 10329 if (--dtrace_vtime_references == 0)
10311 10330 dtrace_vtime_disable();
10312 10331 }
10313 10332
10314 10333 if (--dp->dtdo_refcnt == 0)
10315 10334 dtrace_difo_destroy(dp, vstate);
10316 10335 }
10317 10336
10318 10337 /*
10319 10338 * DTrace Format Functions
10320 10339 */
10321 10340 static uint16_t
10322 10341 dtrace_format_add(dtrace_state_t *state, char *str)
10323 10342 {
10324 10343 char *fmt, **new;
10325 10344 uint16_t ndx, len = strlen(str) + 1;
10326 10345
10327 10346 fmt = kmem_zalloc(len, KM_SLEEP);
10328 10347 bcopy(str, fmt, len);
10329 10348
10330 10349 for (ndx = 0; ndx < state->dts_nformats; ndx++) {
10331 10350 if (state->dts_formats[ndx] == NULL) {
10332 10351 state->dts_formats[ndx] = fmt;
10333 10352 return (ndx + 1);
10334 10353 }
10335 10354 }
10336 10355
10337 10356 if (state->dts_nformats == USHRT_MAX) {
10338 10357 /*
10339 10358 * This is only likely if a denial-of-service attack is being
10340 10359 * attempted. As such, it's okay to fail silently here.
10341 10360 */
10342 10361 kmem_free(fmt, len);
10343 10362 return (0);
10344 10363 }
10345 10364
10346 10365 /*
10347 10366 * For simplicity, we always resize the formats array to be exactly the
10348 10367 * number of formats.
10349 10368 */
10350 10369 ndx = state->dts_nformats++;
10351 10370 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
10352 10371
10353 10372 if (state->dts_formats != NULL) {
10354 10373 ASSERT(ndx != 0);
10355 10374 bcopy(state->dts_formats, new, ndx * sizeof (char *));
10356 10375 kmem_free(state->dts_formats, ndx * sizeof (char *));
10357 10376 }
10358 10377
10359 10378 state->dts_formats = new;
10360 10379 state->dts_formats[ndx] = fmt;
10361 10380
10362 10381 return (ndx + 1);
10363 10382 }
10364 10383
10365 10384 static void
10366 10385 dtrace_format_remove(dtrace_state_t *state, uint16_t format)
10367 10386 {
10368 10387 char *fmt;
10369 10388
10370 10389 ASSERT(state->dts_formats != NULL);
10371 10390 ASSERT(format <= state->dts_nformats);
10372 10391 ASSERT(state->dts_formats[format - 1] != NULL);
10373 10392
10374 10393 fmt = state->dts_formats[format - 1];
10375 10394 kmem_free(fmt, strlen(fmt) + 1);
10376 10395 state->dts_formats[format - 1] = NULL;
10377 10396 }
10378 10397
10379 10398 static void
10380 10399 dtrace_format_destroy(dtrace_state_t *state)
10381 10400 {
10382 10401 int i;
10383 10402
10384 10403 if (state->dts_nformats == 0) {
10385 10404 ASSERT(state->dts_formats == NULL);
10386 10405 return;
10387 10406 }
10388 10407
10389 10408 ASSERT(state->dts_formats != NULL);
10390 10409
10391 10410 for (i = 0; i < state->dts_nformats; i++) {
10392 10411 char *fmt = state->dts_formats[i];
10393 10412
10394 10413 if (fmt == NULL)
10395 10414 continue;
10396 10415
10397 10416 kmem_free(fmt, strlen(fmt) + 1);
10398 10417 }
10399 10418
10400 10419 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
10401 10420 state->dts_nformats = 0;
10402 10421 state->dts_formats = NULL;
10403 10422 }
10404 10423
10405 10424 /*
10406 10425 * DTrace Predicate Functions
10407 10426 */
10408 10427 static dtrace_predicate_t *
10409 10428 dtrace_predicate_create(dtrace_difo_t *dp)
10410 10429 {
10411 10430 dtrace_predicate_t *pred;
10412 10431
10413 10432 ASSERT(MUTEX_HELD(&dtrace_lock));
10414 10433 ASSERT(dp->dtdo_refcnt != 0);
10415 10434
10416 10435 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
10417 10436 pred->dtp_difo = dp;
10418 10437 pred->dtp_refcnt = 1;
10419 10438
10420 10439 if (!dtrace_difo_cacheable(dp))
10421 10440 return (pred);
10422 10441
10423 10442 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
10424 10443 /*
10425 10444 * This is only theoretically possible -- we have had 2^32
10426 10445 * cacheable predicates on this machine. We cannot allow any
10427 10446 * more predicates to become cacheable: as unlikely as it is,
10428 10447 * there may be a thread caching a (now stale) predicate cache
10429 10448 * ID. (N.B.: the temptation is being successfully resisted to
10430 10449 * have this cmn_err() "Holy shit -- we executed this code!")
10431 10450 */
10432 10451 return (pred);
10433 10452 }
10434 10453
10435 10454 pred->dtp_cacheid = dtrace_predcache_id++;
10436 10455
10437 10456 return (pred);
10438 10457 }
10439 10458
10440 10459 static void
10441 10460 dtrace_predicate_hold(dtrace_predicate_t *pred)
10442 10461 {
10443 10462 ASSERT(MUTEX_HELD(&dtrace_lock));
10444 10463 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
10445 10464 ASSERT(pred->dtp_refcnt > 0);
10446 10465
10447 10466 pred->dtp_refcnt++;
10448 10467 }
10449 10468
10450 10469 static void
10451 10470 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
10452 10471 {
10453 10472 dtrace_difo_t *dp = pred->dtp_difo;
10454 10473
10455 10474 ASSERT(MUTEX_HELD(&dtrace_lock));
10456 10475 ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
10457 10476 ASSERT(pred->dtp_refcnt > 0);
10458 10477
10459 10478 if (--pred->dtp_refcnt == 0) {
10460 10479 dtrace_difo_release(pred->dtp_difo, vstate);
10461 10480 kmem_free(pred, sizeof (dtrace_predicate_t));
10462 10481 }
10463 10482 }
10464 10483
10465 10484 /*
10466 10485 * DTrace Action Description Functions
10467 10486 */
10468 10487 static dtrace_actdesc_t *
10469 10488 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
10470 10489 uint64_t uarg, uint64_t arg)
10471 10490 {
10472 10491 dtrace_actdesc_t *act;
10473 10492
10474 10493 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
10475 10494 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
10476 10495
10477 10496 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
10478 10497 act->dtad_kind = kind;
10479 10498 act->dtad_ntuple = ntuple;
10480 10499 act->dtad_uarg = uarg;
10481 10500 act->dtad_arg = arg;
10482 10501 act->dtad_refcnt = 1;
10483 10502
10484 10503 return (act);
10485 10504 }
10486 10505
10487 10506 static void
10488 10507 dtrace_actdesc_hold(dtrace_actdesc_t *act)
10489 10508 {
10490 10509 ASSERT(act->dtad_refcnt >= 1);
10491 10510 act->dtad_refcnt++;
10492 10511 }
10493 10512
10494 10513 static void
10495 10514 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
10496 10515 {
10497 10516 dtrace_actkind_t kind = act->dtad_kind;
10498 10517 dtrace_difo_t *dp;
10499 10518
10500 10519 ASSERT(act->dtad_refcnt >= 1);
10501 10520
10502 10521 if (--act->dtad_refcnt != 0)
10503 10522 return;
10504 10523
10505 10524 if ((dp = act->dtad_difo) != NULL)
10506 10525 dtrace_difo_release(dp, vstate);
10507 10526
10508 10527 if (DTRACEACT_ISPRINTFLIKE(kind)) {
10509 10528 char *str = (char *)(uintptr_t)act->dtad_arg;
10510 10529
10511 10530 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
10512 10531 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
10513 10532
10514 10533 if (str != NULL)
10515 10534 kmem_free(str, strlen(str) + 1);
10516 10535 }
10517 10536
10518 10537 kmem_free(act, sizeof (dtrace_actdesc_t));
10519 10538 }
10520 10539
10521 10540 /*
10522 10541 * DTrace ECB Functions
10523 10542 */
10524 10543 static dtrace_ecb_t *
10525 10544 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
10526 10545 {
10527 10546 dtrace_ecb_t *ecb;
10528 10547 dtrace_epid_t epid;
10529 10548
10530 10549 ASSERT(MUTEX_HELD(&dtrace_lock));
10531 10550
10532 10551 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
10533 10552 ecb->dte_predicate = NULL;
10534 10553 ecb->dte_probe = probe;
10535 10554
10536 10555 /*
10537 10556 * The default size is the size of the default action: recording
10538 10557 * the header.
10539 10558 */
10540 10559 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
10541 10560 ecb->dte_alignment = sizeof (dtrace_epid_t);
10542 10561
10543 10562 epid = state->dts_epid++;
10544 10563
10545 10564 if (epid - 1 >= state->dts_necbs) {
10546 10565 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
10547 10566 int necbs = state->dts_necbs << 1;
10548 10567
10549 10568 ASSERT(epid == state->dts_necbs + 1);
10550 10569
10551 10570 if (necbs == 0) {
10552 10571 ASSERT(oecbs == NULL);
10553 10572 necbs = 1;
10554 10573 }
10555 10574
10556 10575 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
10557 10576
10558 10577 if (oecbs != NULL)
10559 10578 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
10560 10579
10561 10580 dtrace_membar_producer();
10562 10581 state->dts_ecbs = ecbs;
10563 10582
10564 10583 if (oecbs != NULL) {
10565 10584 /*
10566 10585 * If this state is active, we must dtrace_sync()
10567 10586 * before we can free the old dts_ecbs array: we're
10568 10587 * coming in hot, and there may be active ring
10569 10588 * buffer processing (which indexes into the dts_ecbs
10570 10589 * array) on another CPU.
10571 10590 */
10572 10591 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
10573 10592 dtrace_sync();
10574 10593
10575 10594 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
10576 10595 }
10577 10596
10578 10597 dtrace_membar_producer();
10579 10598 state->dts_necbs = necbs;
10580 10599 }
10581 10600
10582 10601 ecb->dte_state = state;
10583 10602
10584 10603 ASSERT(state->dts_ecbs[epid - 1] == NULL);
10585 10604 dtrace_membar_producer();
10586 10605 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
10587 10606
10588 10607 return (ecb);
10589 10608 }
10590 10609
10591 10610 static int
10592 10611 dtrace_ecb_enable(dtrace_ecb_t *ecb)
10593 10612 {
10594 10613 dtrace_probe_t *probe = ecb->dte_probe;
10595 10614
10596 10615 ASSERT(MUTEX_HELD(&cpu_lock));
10597 10616 ASSERT(MUTEX_HELD(&dtrace_lock));
10598 10617 ASSERT(ecb->dte_next == NULL);
10599 10618
10600 10619 if (probe == NULL) {
10601 10620 /*
10602 10621 * This is the NULL probe -- there's nothing to do.
10603 10622 */
10604 10623 return (0);
10605 10624 }
10606 10625
10607 10626 if (probe->dtpr_ecb == NULL) {
10608 10627 dtrace_provider_t *prov = probe->dtpr_provider;
10609 10628
10610 10629 /*
10611 10630 * We're the first ECB on this probe.
10612 10631 */
10613 10632 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
10614 10633
10615 10634 if (ecb->dte_predicate != NULL)
10616 10635 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
10617 10636
10618 10637 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
10619 10638 probe->dtpr_id, probe->dtpr_arg));
10620 10639 } else {
10621 10640 /*
10622 10641 * This probe is already active. Swing the last pointer to
10623 10642 * point to the new ECB, and issue a dtrace_sync() to assure
10624 10643 * that all CPUs have seen the change.
10625 10644 */
10626 10645 ASSERT(probe->dtpr_ecb_last != NULL);
10627 10646 probe->dtpr_ecb_last->dte_next = ecb;
10628 10647 probe->dtpr_ecb_last = ecb;
10629 10648 probe->dtpr_predcache = 0;
10630 10649
10631 10650 dtrace_sync();
10632 10651 return (0);
10633 10652 }
10634 10653 }
10635 10654
10636 10655 static int
10637 10656 dtrace_ecb_resize(dtrace_ecb_t *ecb)
10638 10657 {
10639 10658 dtrace_action_t *act;
10640 10659 uint32_t curneeded = UINT32_MAX;
10641 10660 uint32_t aggbase = UINT32_MAX;
10642 10661
10643 10662 /*
10644 10663 * If we record anything, we always record the dtrace_rechdr_t. (And
10645 10664 * we always record it first.)
10646 10665 */
10647 10666 ecb->dte_size = sizeof (dtrace_rechdr_t);
10648 10667 ecb->dte_alignment = sizeof (dtrace_epid_t);
10649 10668
10650 10669 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
10651 10670 dtrace_recdesc_t *rec = &act->dta_rec;
10652 10671 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
10653 10672
10654 10673 ecb->dte_alignment = MAX(ecb->dte_alignment,
10655 10674 rec->dtrd_alignment);
10656 10675
10657 10676 if (DTRACEACT_ISAGG(act->dta_kind)) {
10658 10677 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
10659 10678
10660 10679 ASSERT(rec->dtrd_size != 0);
10661 10680 ASSERT(agg->dtag_first != NULL);
10662 10681 ASSERT(act->dta_prev->dta_intuple);
10663 10682 ASSERT(aggbase != UINT32_MAX);
10664 10683 ASSERT(curneeded != UINT32_MAX);
10665 10684
10666 10685 agg->dtag_base = aggbase;
10667 10686
10668 10687 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
10669 10688 rec->dtrd_offset = curneeded;
10670 10689 if (curneeded + rec->dtrd_size < curneeded)
10671 10690 return (EINVAL);
10672 10691 curneeded += rec->dtrd_size;
10673 10692 ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
10674 10693
10675 10694 aggbase = UINT32_MAX;
10676 10695 curneeded = UINT32_MAX;
10677 10696 } else if (act->dta_intuple) {
10678 10697 if (curneeded == UINT32_MAX) {
10679 10698 /*
10680 10699 * This is the first record in a tuple. Align
10681 10700 * curneeded to be at offset 4 in an 8-byte
10682 10701 * aligned block.
10683 10702 */
10684 10703 ASSERT(act->dta_prev == NULL ||
10685 10704 !act->dta_prev->dta_intuple);
10686 10705 ASSERT3U(aggbase, ==, UINT32_MAX);
10687 10706 curneeded = P2PHASEUP(ecb->dte_size,
10688 10707 sizeof (uint64_t), sizeof (dtrace_aggid_t));
10689 10708
10690 10709 aggbase = curneeded - sizeof (dtrace_aggid_t);
10691 10710 ASSERT(IS_P2ALIGNED(aggbase,
10692 10711 sizeof (uint64_t)));
10693 10712 }
10694 10713 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
10695 10714 rec->dtrd_offset = curneeded;
10696 10715 if (curneeded + rec->dtrd_size < curneeded)
10697 10716 return (EINVAL);
10698 10717 curneeded += rec->dtrd_size;
10699 10718 } else {
10700 10719 /* tuples must be followed by an aggregation */
10701 10720 ASSERT(act->dta_prev == NULL ||
10702 10721 !act->dta_prev->dta_intuple);
10703 10722
10704 10723 ecb->dte_size = P2ROUNDUP(ecb->dte_size,
10705 10724 rec->dtrd_alignment);
10706 10725 rec->dtrd_offset = ecb->dte_size;
10707 10726 if (ecb->dte_size + rec->dtrd_size < ecb->dte_size)
10708 10727 return (EINVAL);
10709 10728 ecb->dte_size += rec->dtrd_size;
10710 10729 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
10711 10730 }
10712 10731 }
10713 10732
10714 10733 if ((act = ecb->dte_action) != NULL &&
10715 10734 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
10716 10735 ecb->dte_size == sizeof (dtrace_rechdr_t)) {
10717 10736 /*
10718 10737 * If the size is still sizeof (dtrace_rechdr_t), then all
10719 10738 * actions store no data; set the size to 0.
10720 10739 */
10721 10740 ecb->dte_size = 0;
10722 10741 }
10723 10742
10724 10743 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
10725 10744 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
10726 10745 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
10727 10746 ecb->dte_needed);
10728 10747 return (0);
10729 10748 }
10730 10749
10731 10750 static dtrace_action_t *
10732 10751 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
10733 10752 {
10734 10753 dtrace_aggregation_t *agg;
10735 10754 size_t size = sizeof (uint64_t);
10736 10755 int ntuple = desc->dtad_ntuple;
10737 10756 dtrace_action_t *act;
10738 10757 dtrace_recdesc_t *frec;
10739 10758 dtrace_aggid_t aggid;
10740 10759 dtrace_state_t *state = ecb->dte_state;
10741 10760
10742 10761 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
10743 10762 agg->dtag_ecb = ecb;
10744 10763
10745 10764 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
10746 10765
10747 10766 switch (desc->dtad_kind) {
10748 10767 case DTRACEAGG_MIN:
10749 10768 agg->dtag_initial = INT64_MAX;
10750 10769 agg->dtag_aggregate = dtrace_aggregate_min;
10751 10770 break;
10752 10771
10753 10772 case DTRACEAGG_MAX:
10754 10773 agg->dtag_initial = INT64_MIN;
10755 10774 agg->dtag_aggregate = dtrace_aggregate_max;
10756 10775 break;
10757 10776
10758 10777 case DTRACEAGG_COUNT:
10759 10778 agg->dtag_aggregate = dtrace_aggregate_count;
10760 10779 break;
10761 10780
10762 10781 case DTRACEAGG_QUANTIZE:
10763 10782 agg->dtag_aggregate = dtrace_aggregate_quantize;
10764 10783 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
10765 10784 sizeof (uint64_t);
10766 10785 break;
10767 10786
10768 10787 case DTRACEAGG_LQUANTIZE: {
10769 10788 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
10770 10789 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
10771 10790
10772 10791 agg->dtag_initial = desc->dtad_arg;
10773 10792 agg->dtag_aggregate = dtrace_aggregate_lquantize;
10774 10793
10775 10794 if (step == 0 || levels == 0)
10776 10795 goto err;
10777 10796
10778 10797 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
10779 10798 break;
10780 10799 }
10781 10800
10782 10801 case DTRACEAGG_LLQUANTIZE: {
10783 10802 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
10784 10803 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
10785 10804 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
10786 10805 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
10787 10806 int64_t v;
10788 10807
10789 10808 agg->dtag_initial = desc->dtad_arg;
10790 10809 agg->dtag_aggregate = dtrace_aggregate_llquantize;
10791 10810
10792 10811 if (factor < 2 || low >= high || nsteps < factor)
10793 10812 goto err;
10794 10813
10795 10814 /*
10796 10815 * Now check that the number of steps evenly divides a power
10797 10816 * of the factor. (This assures both integer bucket size and
10798 10817 * linearity within each magnitude.)
10799 10818 */
10800 10819 for (v = factor; v < nsteps; v *= factor)
10801 10820 continue;
10802 10821
10803 10822 if ((v % nsteps) || (nsteps % factor))
10804 10823 goto err;
10805 10824
10806 10825 size = (dtrace_aggregate_llquantize_bucket(factor,
10807 10826 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
10808 10827 break;
10809 10828 }
10810 10829
10811 10830 case DTRACEAGG_AVG:
10812 10831 agg->dtag_aggregate = dtrace_aggregate_avg;
10813 10832 size = sizeof (uint64_t) * 2;
10814 10833 break;
10815 10834
10816 10835 case DTRACEAGG_STDDEV:
10817 10836 agg->dtag_aggregate = dtrace_aggregate_stddev;
10818 10837 size = sizeof (uint64_t) * 4;
10819 10838 break;
10820 10839
10821 10840 case DTRACEAGG_SUM:
10822 10841 agg->dtag_aggregate = dtrace_aggregate_sum;
10823 10842 break;
10824 10843
10825 10844 default:
10826 10845 goto err;
10827 10846 }
10828 10847
10829 10848 agg->dtag_action.dta_rec.dtrd_size = size;
10830 10849
10831 10850 if (ntuple == 0)
10832 10851 goto err;
10833 10852
10834 10853 /*
10835 10854 * We must make sure that we have enough actions for the n-tuple.
10836 10855 */
10837 10856 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
10838 10857 if (DTRACEACT_ISAGG(act->dta_kind))
10839 10858 break;
10840 10859
10841 10860 if (--ntuple == 0) {
10842 10861 /*
10843 10862 * This is the action with which our n-tuple begins.
10844 10863 */
10845 10864 agg->dtag_first = act;
10846 10865 goto success;
10847 10866 }
10848 10867 }
10849 10868
10850 10869 /*
10851 10870 * This n-tuple is short by ntuple elements. Return failure.
10852 10871 */
10853 10872 ASSERT(ntuple != 0);
10854 10873 err:
10855 10874 kmem_free(agg, sizeof (dtrace_aggregation_t));
10856 10875 return (NULL);
10857 10876
10858 10877 success:
10859 10878 /*
10860 10879 * If the last action in the tuple has a size of zero, it's actually
10861 10880 * an expression argument for the aggregating action.
10862 10881 */
10863 10882 ASSERT(ecb->dte_action_last != NULL);
10864 10883 act = ecb->dte_action_last;
10865 10884
10866 10885 if (act->dta_kind == DTRACEACT_DIFEXPR) {
10867 10886 ASSERT(act->dta_difo != NULL);
10868 10887
10869 10888 if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
10870 10889 agg->dtag_hasarg = 1;
10871 10890 }
10872 10891
10873 10892 /*
10874 10893 * We need to allocate an id for this aggregation.
10875 10894 */
10876 10895 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
10877 10896 VM_BESTFIT | VM_SLEEP);
10878 10897
10879 10898 if (aggid - 1 >= state->dts_naggregations) {
10880 10899 dtrace_aggregation_t **oaggs = state->dts_aggregations;
10881 10900 dtrace_aggregation_t **aggs;
10882 10901 int naggs = state->dts_naggregations << 1;
10883 10902 int onaggs = state->dts_naggregations;
10884 10903
10885 10904 ASSERT(aggid == state->dts_naggregations + 1);
10886 10905
10887 10906 if (naggs == 0) {
10888 10907 ASSERT(oaggs == NULL);
10889 10908 naggs = 1;
10890 10909 }
10891 10910
10892 10911 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
10893 10912
10894 10913 if (oaggs != NULL) {
10895 10914 bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
10896 10915 kmem_free(oaggs, onaggs * sizeof (*aggs));
10897 10916 }
10898 10917
10899 10918 state->dts_aggregations = aggs;
10900 10919 state->dts_naggregations = naggs;
10901 10920 }
10902 10921
10903 10922 ASSERT(state->dts_aggregations[aggid - 1] == NULL);
10904 10923 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
10905 10924
10906 10925 frec = &agg->dtag_first->dta_rec;
10907 10926 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
10908 10927 frec->dtrd_alignment = sizeof (dtrace_aggid_t);
10909 10928
10910 10929 for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
10911 10930 ASSERT(!act->dta_intuple);
10912 10931 act->dta_intuple = 1;
10913 10932 }
10914 10933
10915 10934 return (&agg->dtag_action);
10916 10935 }
10917 10936
10918 10937 static void
10919 10938 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
10920 10939 {
10921 10940 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
10922 10941 dtrace_state_t *state = ecb->dte_state;
10923 10942 dtrace_aggid_t aggid = agg->dtag_id;
10924 10943
10925 10944 ASSERT(DTRACEACT_ISAGG(act->dta_kind));
10926 10945 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
10927 10946
10928 10947 ASSERT(state->dts_aggregations[aggid - 1] == agg);
10929 10948 state->dts_aggregations[aggid - 1] = NULL;
10930 10949
10931 10950 kmem_free(agg, sizeof (dtrace_aggregation_t));
10932 10951 }
10933 10952
10934 10953 static int
10935 10954 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
10936 10955 {
10937 10956 dtrace_action_t *action, *last;
10938 10957 dtrace_difo_t *dp = desc->dtad_difo;
10939 10958 uint32_t size = 0, align = sizeof (uint8_t), mask;
10940 10959 uint16_t format = 0;
10941 10960 dtrace_recdesc_t *rec;
10942 10961 dtrace_state_t *state = ecb->dte_state;
10943 10962 dtrace_optval_t *opt = state->dts_options, nframes, strsize;
10944 10963 uint64_t arg = desc->dtad_arg;
10945 10964
10946 10965 ASSERT(MUTEX_HELD(&dtrace_lock));
10947 10966 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
10948 10967
10949 10968 if (DTRACEACT_ISAGG(desc->dtad_kind)) {
10950 10969 /*
10951 10970 * If this is an aggregating action, there must be neither
10952 10971 * a speculate nor a commit on the action chain.
10953 10972 */
10954 10973 dtrace_action_t *act;
10955 10974
10956 10975 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
10957 10976 if (act->dta_kind == DTRACEACT_COMMIT)
10958 10977 return (EINVAL);
10959 10978
10960 10979 if (act->dta_kind == DTRACEACT_SPECULATE)
10961 10980 return (EINVAL);
10962 10981 }
10963 10982
10964 10983 action = dtrace_ecb_aggregation_create(ecb, desc);
10965 10984
10966 10985 if (action == NULL)
10967 10986 return (EINVAL);
10968 10987 } else {
10969 10988 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
10970 10989 (desc->dtad_kind == DTRACEACT_DIFEXPR &&
10971 10990 dp != NULL && dp->dtdo_destructive)) {
10972 10991 state->dts_destructive = 1;
10973 10992 }
10974 10993
10975 10994 switch (desc->dtad_kind) {
10976 10995 case DTRACEACT_PRINTF:
10977 10996 case DTRACEACT_PRINTA:
10978 10997 case DTRACEACT_SYSTEM:
10979 10998 case DTRACEACT_FREOPEN:
10980 10999 case DTRACEACT_DIFEXPR:
10981 11000 /*
10982 11001 * We know that our arg is a string -- turn it into a
10983 11002 * format.
10984 11003 */
10985 11004 if (arg == NULL) {
10986 11005 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
10987 11006 desc->dtad_kind == DTRACEACT_DIFEXPR);
10988 11007 format = 0;
10989 11008 } else {
10990 11009 ASSERT(arg != NULL);
10991 11010 ASSERT(arg > KERNELBASE);
10992 11011 format = dtrace_format_add(state,
10993 11012 (char *)(uintptr_t)arg);
10994 11013 }
10995 11014
10996 11015 /*FALLTHROUGH*/
10997 11016 case DTRACEACT_LIBACT:
10998 11017 case DTRACEACT_TRACEMEM:
10999 11018 case DTRACEACT_TRACEMEM_DYNSIZE:
11000 11019 if (dp == NULL)
11001 11020 return (EINVAL);
11002 11021
11003 11022 if ((size = dp->dtdo_rtype.dtdt_size) != 0)
11004 11023 break;
11005 11024
11006 11025 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
11007 11026 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11008 11027 return (EINVAL);
11009 11028
11010 11029 size = opt[DTRACEOPT_STRSIZE];
11011 11030 }
11012 11031
11013 11032 break;
11014 11033
11015 11034 case DTRACEACT_STACK:
11016 11035 if ((nframes = arg) == 0) {
11017 11036 nframes = opt[DTRACEOPT_STACKFRAMES];
11018 11037 ASSERT(nframes > 0);
11019 11038 arg = nframes;
11020 11039 }
11021 11040
11022 11041 size = nframes * sizeof (pc_t);
11023 11042 break;
11024 11043
11025 11044 case DTRACEACT_JSTACK:
11026 11045 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
11027 11046 strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
11028 11047
11029 11048 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
11030 11049 nframes = opt[DTRACEOPT_JSTACKFRAMES];
11031 11050
11032 11051 arg = DTRACE_USTACK_ARG(nframes, strsize);
11033 11052
11034 11053 /*FALLTHROUGH*/
11035 11054 case DTRACEACT_USTACK:
11036 11055 if (desc->dtad_kind != DTRACEACT_JSTACK &&
11037 11056 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
11038 11057 strsize = DTRACE_USTACK_STRSIZE(arg);
11039 11058 nframes = opt[DTRACEOPT_USTACKFRAMES];
11040 11059 ASSERT(nframes > 0);
11041 11060 arg = DTRACE_USTACK_ARG(nframes, strsize);
11042 11061 }
11043 11062
11044 11063 /*
11045 11064 * Save a slot for the pid.
11046 11065 */
11047 11066 size = (nframes + 1) * sizeof (uint64_t);
11048 11067 size += DTRACE_USTACK_STRSIZE(arg);
11049 11068 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
11050 11069
11051 11070 break;
11052 11071
11053 11072 case DTRACEACT_SYM:
11054 11073 case DTRACEACT_MOD:
11055 11074 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
11056 11075 sizeof (uint64_t)) ||
11057 11076 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11058 11077 return (EINVAL);
11059 11078 break;
11060 11079
11061 11080 case DTRACEACT_USYM:
11062 11081 case DTRACEACT_UMOD:
11063 11082 case DTRACEACT_UADDR:
11064 11083 if (dp == NULL ||
11065 11084 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
11066 11085 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11067 11086 return (EINVAL);
11068 11087
11069 11088 /*
11070 11089 * We have a slot for the pid, plus a slot for the
11071 11090 * argument. To keep things simple (aligned with
11072 11091 * bitness-neutral sizing), we store each as a 64-bit
11073 11092 * quantity.
11074 11093 */
11075 11094 size = 2 * sizeof (uint64_t);
11076 11095 break;
11077 11096
11078 11097 case DTRACEACT_STOP:
11079 11098 case DTRACEACT_BREAKPOINT:
11080 11099 case DTRACEACT_PANIC:
11081 11100 break;
11082 11101
11083 11102 case DTRACEACT_CHILL:
11084 11103 case DTRACEACT_DISCARD:
11085 11104 case DTRACEACT_RAISE:
11086 11105 if (dp == NULL)
11087 11106 return (EINVAL);
11088 11107 break;
11089 11108
11090 11109 case DTRACEACT_EXIT:
11091 11110 if (dp == NULL ||
11092 11111 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
11093 11112 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11094 11113 return (EINVAL);
11095 11114 break;
11096 11115
11097 11116 case DTRACEACT_SPECULATE:
11098 11117 if (ecb->dte_size > sizeof (dtrace_rechdr_t))
11099 11118 return (EINVAL);
11100 11119
11101 11120 if (dp == NULL)
11102 11121 return (EINVAL);
11103 11122
11104 11123 state->dts_speculates = 1;
11105 11124 break;
11106 11125
11107 11126 case DTRACEACT_COMMIT: {
11108 11127 dtrace_action_t *act = ecb->dte_action;
11109 11128
11110 11129 for (; act != NULL; act = act->dta_next) {
11111 11130 if (act->dta_kind == DTRACEACT_COMMIT)
11112 11131 return (EINVAL);
11113 11132 }
11114 11133
11115 11134 if (dp == NULL)
11116 11135 return (EINVAL);
11117 11136 break;
11118 11137 }
11119 11138
11120 11139 default:
11121 11140 return (EINVAL);
11122 11141 }
11123 11142
11124 11143 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
11125 11144 /*
11126 11145 * If this is a data-storing action or a speculate,
11127 11146 * we must be sure that there isn't a commit on the
11128 11147 * action chain.
11129 11148 */
11130 11149 dtrace_action_t *act = ecb->dte_action;
11131 11150
11132 11151 for (; act != NULL; act = act->dta_next) {
11133 11152 if (act->dta_kind == DTRACEACT_COMMIT)
11134 11153 return (EINVAL);
11135 11154 }
11136 11155 }
11137 11156
11138 11157 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
11139 11158 action->dta_rec.dtrd_size = size;
11140 11159 }
11141 11160
11142 11161 action->dta_refcnt = 1;
11143 11162 rec = &action->dta_rec;
11144 11163 size = rec->dtrd_size;
11145 11164
11146 11165 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
11147 11166 if (!(size & mask)) {
11148 11167 align = mask + 1;
11149 11168 break;
11150 11169 }
11151 11170 }
11152 11171
11153 11172 action->dta_kind = desc->dtad_kind;
11154 11173
11155 11174 if ((action->dta_difo = dp) != NULL)
11156 11175 dtrace_difo_hold(dp);
11157 11176
11158 11177 rec->dtrd_action = action->dta_kind;
11159 11178 rec->dtrd_arg = arg;
11160 11179 rec->dtrd_uarg = desc->dtad_uarg;
11161 11180 rec->dtrd_alignment = (uint16_t)align;
11162 11181 rec->dtrd_format = format;
11163 11182
11164 11183 if ((last = ecb->dte_action_last) != NULL) {
11165 11184 ASSERT(ecb->dte_action != NULL);
11166 11185 action->dta_prev = last;
11167 11186 last->dta_next = action;
11168 11187 } else {
11169 11188 ASSERT(ecb->dte_action == NULL);
11170 11189 ecb->dte_action = action;
11171 11190 }
11172 11191
11173 11192 ecb->dte_action_last = action;
11174 11193
11175 11194 return (0);
11176 11195 }
11177 11196
11178 11197 static void
11179 11198 dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
11180 11199 {
11181 11200 dtrace_action_t *act = ecb->dte_action, *next;
11182 11201 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
11183 11202 dtrace_difo_t *dp;
11184 11203 uint16_t format;
11185 11204
11186 11205 if (act != NULL && act->dta_refcnt > 1) {
11187 11206 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
11188 11207 act->dta_refcnt--;
11189 11208 } else {
11190 11209 for (; act != NULL; act = next) {
11191 11210 next = act->dta_next;
11192 11211 ASSERT(next != NULL || act == ecb->dte_action_last);
11193 11212 ASSERT(act->dta_refcnt == 1);
11194 11213
11195 11214 if ((format = act->dta_rec.dtrd_format) != 0)
11196 11215 dtrace_format_remove(ecb->dte_state, format);
11197 11216
11198 11217 if ((dp = act->dta_difo) != NULL)
11199 11218 dtrace_difo_release(dp, vstate);
11200 11219
11201 11220 if (DTRACEACT_ISAGG(act->dta_kind)) {
11202 11221 dtrace_ecb_aggregation_destroy(ecb, act);
11203 11222 } else {
11204 11223 kmem_free(act, sizeof (dtrace_action_t));
11205 11224 }
11206 11225 }
11207 11226 }
11208 11227
11209 11228 ecb->dte_action = NULL;
11210 11229 ecb->dte_action_last = NULL;
11211 11230 ecb->dte_size = 0;
11212 11231 }
11213 11232
11214 11233 static void
11215 11234 dtrace_ecb_disable(dtrace_ecb_t *ecb)
11216 11235 {
11217 11236 /*
11218 11237 * We disable the ECB by removing it from its probe.
11219 11238 */
11220 11239 dtrace_ecb_t *pecb, *prev = NULL;
11221 11240 dtrace_probe_t *probe = ecb->dte_probe;
11222 11241
11223 11242 ASSERT(MUTEX_HELD(&dtrace_lock));
11224 11243
11225 11244 if (probe == NULL) {
11226 11245 /*
11227 11246 * This is the NULL probe; there is nothing to disable.
11228 11247 */
11229 11248 return;
11230 11249 }
11231 11250
11232 11251 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
11233 11252 if (pecb == ecb)
11234 11253 break;
11235 11254 prev = pecb;
11236 11255 }
11237 11256
11238 11257 ASSERT(pecb != NULL);
11239 11258
11240 11259 if (prev == NULL) {
11241 11260 probe->dtpr_ecb = ecb->dte_next;
11242 11261 } else {
11243 11262 prev->dte_next = ecb->dte_next;
11244 11263 }
11245 11264
11246 11265 if (ecb == probe->dtpr_ecb_last) {
11247 11266 ASSERT(ecb->dte_next == NULL);
11248 11267 probe->dtpr_ecb_last = prev;
11249 11268 }
11250 11269
11251 11270 /*
11252 11271 * The ECB has been disconnected from the probe; now sync to assure
11253 11272 * that all CPUs have seen the change before returning.
11254 11273 */
11255 11274 dtrace_sync();
11256 11275
11257 11276 if (probe->dtpr_ecb == NULL) {
11258 11277 /*
11259 11278 * That was the last ECB on the probe; clear the predicate
11260 11279 * cache ID for the probe, disable it and sync one more time
11261 11280 * to assure that we'll never hit it again.
11262 11281 */
11263 11282 dtrace_provider_t *prov = probe->dtpr_provider;
11264 11283
11265 11284 ASSERT(ecb->dte_next == NULL);
11266 11285 ASSERT(probe->dtpr_ecb_last == NULL);
11267 11286 probe->dtpr_predcache = DTRACE_CACHEIDNONE;
11268 11287 prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
11269 11288 probe->dtpr_id, probe->dtpr_arg);
11270 11289 dtrace_sync();
11271 11290 } else {
11272 11291 /*
11273 11292 * There is at least one ECB remaining on the probe. If there
11274 11293 * is _exactly_ one, set the probe's predicate cache ID to be
11275 11294 * the predicate cache ID of the remaining ECB.
11276 11295 */
11277 11296 ASSERT(probe->dtpr_ecb_last != NULL);
11278 11297 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
11279 11298
11280 11299 if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
11281 11300 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
11282 11301
11283 11302 ASSERT(probe->dtpr_ecb->dte_next == NULL);
11284 11303
11285 11304 if (p != NULL)
11286 11305 probe->dtpr_predcache = p->dtp_cacheid;
11287 11306 }
11288 11307
11289 11308 ecb->dte_next = NULL;
11290 11309 }
11291 11310 }
11292 11311
11293 11312 static void
11294 11313 dtrace_ecb_destroy(dtrace_ecb_t *ecb)
11295 11314 {
11296 11315 dtrace_state_t *state = ecb->dte_state;
11297 11316 dtrace_vstate_t *vstate = &state->dts_vstate;
11298 11317 dtrace_predicate_t *pred;
11299 11318 dtrace_epid_t epid = ecb->dte_epid;
11300 11319
11301 11320 ASSERT(MUTEX_HELD(&dtrace_lock));
11302 11321 ASSERT(ecb->dte_next == NULL);
11303 11322 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
11304 11323
11305 11324 if ((pred = ecb->dte_predicate) != NULL)
11306 11325 dtrace_predicate_release(pred, vstate);
11307 11326
11308 11327 dtrace_ecb_action_remove(ecb);
11309 11328
11310 11329 ASSERT(state->dts_ecbs[epid - 1] == ecb);
11311 11330 state->dts_ecbs[epid - 1] = NULL;
11312 11331
11313 11332 kmem_free(ecb, sizeof (dtrace_ecb_t));
11314 11333 }
11315 11334
11316 11335 static dtrace_ecb_t *
11317 11336 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
11318 11337 dtrace_enabling_t *enab)
11319 11338 {
11320 11339 dtrace_ecb_t *ecb;
11321 11340 dtrace_predicate_t *pred;
11322 11341 dtrace_actdesc_t *act;
11323 11342 dtrace_provider_t *prov;
11324 11343 dtrace_ecbdesc_t *desc = enab->dten_current;
11325 11344
11326 11345 ASSERT(MUTEX_HELD(&dtrace_lock));
11327 11346 ASSERT(state != NULL);
11328 11347
11329 11348 ecb = dtrace_ecb_add(state, probe);
11330 11349 ecb->dte_uarg = desc->dted_uarg;
11331 11350
11332 11351 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
11333 11352 dtrace_predicate_hold(pred);
11334 11353 ecb->dte_predicate = pred;
11335 11354 }
11336 11355
11337 11356 if (probe != NULL) {
11338 11357 /*
11339 11358 * If the provider shows more leg than the consumer is old
11340 11359 * enough to see, we need to enable the appropriate implicit
11341 11360 * predicate bits to prevent the ecb from activating at
11342 11361 * revealing times.
11343 11362 *
11344 11363 * Providers specifying DTRACE_PRIV_USER at register time
11345 11364 * are stating that they need the /proc-style privilege
11346 11365 * model to be enforced, and this is what DTRACE_COND_OWNER
11347 11366 * and DTRACE_COND_ZONEOWNER will then do at probe time.
11348 11367 */
11349 11368 prov = probe->dtpr_provider;
11350 11369 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
11351 11370 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
11352 11371 ecb->dte_cond |= DTRACE_COND_OWNER;
11353 11372
11354 11373 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
11355 11374 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
11356 11375 ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
11357 11376
11358 11377 /*
11359 11378 * If the provider shows us kernel innards and the user
11360 11379 * is lacking sufficient privilege, enable the
11361 11380 * DTRACE_COND_USERMODE implicit predicate.
11362 11381 */
11363 11382 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
11364 11383 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
11365 11384 ecb->dte_cond |= DTRACE_COND_USERMODE;
11366 11385 }
11367 11386
11368 11387 if (dtrace_ecb_create_cache != NULL) {
11369 11388 /*
11370 11389 * If we have a cached ecb, we'll use its action list instead
11371 11390 * of creating our own (saving both time and space).
11372 11391 */
11373 11392 dtrace_ecb_t *cached = dtrace_ecb_create_cache;
11374 11393 dtrace_action_t *act = cached->dte_action;
11375 11394
11376 11395 if (act != NULL) {
11377 11396 ASSERT(act->dta_refcnt > 0);
11378 11397 act->dta_refcnt++;
11379 11398 ecb->dte_action = act;
11380 11399 ecb->dte_action_last = cached->dte_action_last;
11381 11400 ecb->dte_needed = cached->dte_needed;
11382 11401 ecb->dte_size = cached->dte_size;
11383 11402 ecb->dte_alignment = cached->dte_alignment;
11384 11403 }
11385 11404
11386 11405 return (ecb);
11387 11406 }
11388 11407
11389 11408 for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
11390 11409 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
11391 11410 dtrace_ecb_destroy(ecb);
11392 11411 return (NULL);
11393 11412 }
11394 11413 }
11395 11414
11396 11415 if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) {
11397 11416 dtrace_ecb_destroy(ecb);
11398 11417 return (NULL);
11399 11418 }
11400 11419
11401 11420 return (dtrace_ecb_create_cache = ecb);
11402 11421 }
11403 11422
11404 11423 static int
11405 11424 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
11406 11425 {
11407 11426 dtrace_ecb_t *ecb;
11408 11427 dtrace_enabling_t *enab = arg;
11409 11428 dtrace_state_t *state = enab->dten_vstate->dtvs_state;
11410 11429
11411 11430 ASSERT(state != NULL);
11412 11431
11413 11432 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
11414 11433 /*
11415 11434 * This probe was created in a generation for which this
11416 11435 * enabling has previously created ECBs; we don't want to
11417 11436 * enable it again, so just kick out.
11418 11437 */
11419 11438 return (DTRACE_MATCH_NEXT);
11420 11439 }
11421 11440
11422 11441 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
11423 11442 return (DTRACE_MATCH_DONE);
11424 11443
11425 11444 if (dtrace_ecb_enable(ecb) < 0)
11426 11445 return (DTRACE_MATCH_FAIL);
11427 11446
11428 11447 return (DTRACE_MATCH_NEXT);
11429 11448 }
11430 11449
11431 11450 static dtrace_ecb_t *
11432 11451 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
11433 11452 {
11434 11453 dtrace_ecb_t *ecb;
11435 11454
11436 11455 ASSERT(MUTEX_HELD(&dtrace_lock));
11437 11456
11438 11457 if (id == 0 || id > state->dts_necbs)
11439 11458 return (NULL);
11440 11459
11441 11460 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
11442 11461 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
11443 11462
11444 11463 return (state->dts_ecbs[id - 1]);
11445 11464 }
11446 11465
11447 11466 static dtrace_aggregation_t *
11448 11467 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
11449 11468 {
11450 11469 dtrace_aggregation_t *agg;
11451 11470
11452 11471 ASSERT(MUTEX_HELD(&dtrace_lock));
11453 11472
11454 11473 if (id == 0 || id > state->dts_naggregations)
11455 11474 return (NULL);
11456 11475
11457 11476 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
11458 11477 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
11459 11478 agg->dtag_id == id);
11460 11479
11461 11480 return (state->dts_aggregations[id - 1]);
11462 11481 }
11463 11482
11464 11483 /*
11465 11484 * DTrace Buffer Functions
11466 11485 *
11467 11486 * The following functions manipulate DTrace buffers. Most of these functions
11468 11487 * are called in the context of establishing or processing consumer state;
11469 11488 * exceptions are explicitly noted.
11470 11489 */
11471 11490
11472 11491 /*
11473 11492 * Note: called from cross call context. This function switches the two
11474 11493 * buffers on a given CPU. The atomicity of this operation is assured by
11475 11494 * disabling interrupts while the actual switch takes place; the disabling of
11476 11495 * interrupts serializes the execution with any execution of dtrace_probe() on
11477 11496 * the same CPU.
11478 11497 */
11479 11498 static void
11480 11499 dtrace_buffer_switch(dtrace_buffer_t *buf)
11481 11500 {
11482 11501 caddr_t tomax = buf->dtb_tomax;
11483 11502 caddr_t xamot = buf->dtb_xamot;
11484 11503 dtrace_icookie_t cookie;
11485 11504 hrtime_t now;
11486 11505
11487 11506 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
11488 11507 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
11489 11508
11490 11509 cookie = dtrace_interrupt_disable();
11491 11510 now = dtrace_gethrtime();
11492 11511 buf->dtb_tomax = xamot;
11493 11512 buf->dtb_xamot = tomax;
11494 11513 buf->dtb_xamot_drops = buf->dtb_drops;
11495 11514 buf->dtb_xamot_offset = buf->dtb_offset;
11496 11515 buf->dtb_xamot_errors = buf->dtb_errors;
11497 11516 buf->dtb_xamot_flags = buf->dtb_flags;
11498 11517 buf->dtb_offset = 0;
11499 11518 buf->dtb_drops = 0;
11500 11519 buf->dtb_errors = 0;
11501 11520 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
11502 11521 buf->dtb_interval = now - buf->dtb_switched;
11503 11522 buf->dtb_switched = now;
11504 11523 dtrace_interrupt_enable(cookie);
11505 11524 }
11506 11525
11507 11526 /*
11508 11527 * Note: called from cross call context. This function activates a buffer
11509 11528 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
11510 11529 * is guaranteed by the disabling of interrupts.
11511 11530 */
11512 11531 static void
11513 11532 dtrace_buffer_activate(dtrace_state_t *state)
11514 11533 {
11515 11534 dtrace_buffer_t *buf;
11516 11535 dtrace_icookie_t cookie = dtrace_interrupt_disable();
11517 11536
11518 11537 buf = &state->dts_buffer[CPU->cpu_id];
11519 11538
11520 11539 if (buf->dtb_tomax != NULL) {
11521 11540 /*
11522 11541 * We might like to assert that the buffer is marked inactive,
11523 11542 * but this isn't necessarily true: the buffer for the CPU
11524 11543 * that processes the BEGIN probe has its buffer activated
11525 11544 * manually. In this case, we take the (harmless) action
11526 11545 * re-clearing the bit INACTIVE bit.
11527 11546 */
11528 11547 buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
11529 11548 }
11530 11549
11531 11550 dtrace_interrupt_enable(cookie);
11532 11551 }
11533 11552
11534 11553 static int
11535 11554 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
11536 11555 processorid_t cpu, int *factor)
11537 11556 {
11538 11557 cpu_t *cp;
11539 11558 dtrace_buffer_t *buf;
11540 11559 int allocated = 0, desired = 0;
11541 11560
11542 11561 ASSERT(MUTEX_HELD(&cpu_lock));
11543 11562 ASSERT(MUTEX_HELD(&dtrace_lock));
11544 11563
11545 11564 *factor = 1;
11546 11565
11547 11566 if (size > dtrace_nonroot_maxsize &&
11548 11567 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
11549 11568 return (EFBIG);
11550 11569
11551 11570 cp = cpu_list;
11552 11571
11553 11572 do {
11554 11573 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
11555 11574 continue;
11556 11575
11557 11576 buf = &bufs[cp->cpu_id];
11558 11577
11559 11578 /*
11560 11579 * If there is already a buffer allocated for this CPU, it
11561 11580 * is only possible that this is a DR event. In this case,
11562 11581 * the buffer size must match our specified size.
11563 11582 */
11564 11583 if (buf->dtb_tomax != NULL) {
11565 11584 ASSERT(buf->dtb_size == size);
11566 11585 continue;
11567 11586 }
11568 11587
11569 11588 ASSERT(buf->dtb_xamot == NULL);
11570 11589
11571 11590 if ((buf->dtb_tomax = kmem_zalloc(size,
11572 11591 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
11573 11592 goto err;
11574 11593
11575 11594 buf->dtb_size = size;
11576 11595 buf->dtb_flags = flags;
11577 11596 buf->dtb_offset = 0;
11578 11597 buf->dtb_drops = 0;
11579 11598
11580 11599 if (flags & DTRACEBUF_NOSWITCH)
11581 11600 continue;
11582 11601
11583 11602 if ((buf->dtb_xamot = kmem_zalloc(size,
11584 11603 KM_NOSLEEP | KM_NORMALPRI)) == NULL)
11585 11604 goto err;
11586 11605 } while ((cp = cp->cpu_next) != cpu_list);
11587 11606
11588 11607 return (0);
11589 11608
11590 11609 err:
11591 11610 cp = cpu_list;
11592 11611
11593 11612 do {
11594 11613 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
11595 11614 continue;
11596 11615
11597 11616 buf = &bufs[cp->cpu_id];
11598 11617 desired += 2;
11599 11618
11600 11619 if (buf->dtb_xamot != NULL) {
11601 11620 ASSERT(buf->dtb_tomax != NULL);
11602 11621 ASSERT(buf->dtb_size == size);
11603 11622 kmem_free(buf->dtb_xamot, size);
11604 11623 allocated++;
11605 11624 }
11606 11625
11607 11626 if (buf->dtb_tomax != NULL) {
11608 11627 ASSERT(buf->dtb_size == size);
11609 11628 kmem_free(buf->dtb_tomax, size);
11610 11629 allocated++;
11611 11630 }
11612 11631
11613 11632 buf->dtb_tomax = NULL;
11614 11633 buf->dtb_xamot = NULL;
11615 11634 buf->dtb_size = 0;
11616 11635 } while ((cp = cp->cpu_next) != cpu_list);
11617 11636
11618 11637 *factor = desired / (allocated > 0 ? allocated : 1);
11619 11638
11620 11639 return (ENOMEM);
11621 11640 }
11622 11641
11623 11642 /*
11624 11643 * Note: called from probe context. This function just increments the drop
11625 11644 * count on a buffer. It has been made a function to allow for the
11626 11645 * possibility of understanding the source of mysterious drop counts. (A
11627 11646 * problem for which one may be particularly disappointed that DTrace cannot
11628 11647 * be used to understand DTrace.)
11629 11648 */
11630 11649 static void
11631 11650 dtrace_buffer_drop(dtrace_buffer_t *buf)
11632 11651 {
11633 11652 buf->dtb_drops++;
11634 11653 }
11635 11654
11636 11655 /*
11637 11656 * Note: called from probe context. This function is called to reserve space
11638 11657 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
11639 11658 * mstate. Returns the new offset in the buffer, or a negative value if an
11640 11659 * error has occurred.
11641 11660 */
11642 11661 static intptr_t
11643 11662 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
11644 11663 dtrace_state_t *state, dtrace_mstate_t *mstate)
11645 11664 {
11646 11665 intptr_t offs = buf->dtb_offset, soffs;
11647 11666 intptr_t woffs;
11648 11667 caddr_t tomax;
11649 11668 size_t total;
11650 11669
11651 11670 if (buf->dtb_flags & DTRACEBUF_INACTIVE)
11652 11671 return (-1);
11653 11672
11654 11673 if ((tomax = buf->dtb_tomax) == NULL) {
11655 11674 dtrace_buffer_drop(buf);
11656 11675 return (-1);
11657 11676 }
11658 11677
11659 11678 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
11660 11679 while (offs & (align - 1)) {
11661 11680 /*
11662 11681 * Assert that our alignment is off by a number which
11663 11682 * is itself sizeof (uint32_t) aligned.
11664 11683 */
11665 11684 ASSERT(!((align - (offs & (align - 1))) &
11666 11685 (sizeof (uint32_t) - 1)));
11667 11686 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
11668 11687 offs += sizeof (uint32_t);
11669 11688 }
11670 11689
11671 11690 if ((soffs = offs + needed) > buf->dtb_size) {
11672 11691 dtrace_buffer_drop(buf);
11673 11692 return (-1);
11674 11693 }
11675 11694
11676 11695 if (mstate == NULL)
11677 11696 return (offs);
11678 11697
11679 11698 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
11680 11699 mstate->dtms_scratch_size = buf->dtb_size - soffs;
11681 11700 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
11682 11701
11683 11702 return (offs);
11684 11703 }
11685 11704
11686 11705 if (buf->dtb_flags & DTRACEBUF_FILL) {
11687 11706 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
11688 11707 (buf->dtb_flags & DTRACEBUF_FULL))
11689 11708 return (-1);
11690 11709 goto out;
11691 11710 }
11692 11711
11693 11712 total = needed + (offs & (align - 1));
11694 11713
11695 11714 /*
11696 11715 * For a ring buffer, life is quite a bit more complicated. Before
11697 11716 * we can store any padding, we need to adjust our wrapping offset.
11698 11717 * (If we've never before wrapped or we're not about to, no adjustment
11699 11718 * is required.)
11700 11719 */
11701 11720 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
11702 11721 offs + total > buf->dtb_size) {
11703 11722 woffs = buf->dtb_xamot_offset;
11704 11723
11705 11724 if (offs + total > buf->dtb_size) {
11706 11725 /*
11707 11726 * We can't fit in the end of the buffer. First, a
11708 11727 * sanity check that we can fit in the buffer at all.
11709 11728 */
11710 11729 if (total > buf->dtb_size) {
11711 11730 dtrace_buffer_drop(buf);
11712 11731 return (-1);
11713 11732 }
11714 11733
11715 11734 /*
11716 11735 * We're going to be storing at the top of the buffer,
11717 11736 * so now we need to deal with the wrapped offset. We
11718 11737 * only reset our wrapped offset to 0 if it is
11719 11738 * currently greater than the current offset. If it
11720 11739 * is less than the current offset, it is because a
11721 11740 * previous allocation induced a wrap -- but the
11722 11741 * allocation didn't subsequently take the space due
11723 11742 * to an error or false predicate evaluation. In this
11724 11743 * case, we'll just leave the wrapped offset alone: if
11725 11744 * the wrapped offset hasn't been advanced far enough
11726 11745 * for this allocation, it will be adjusted in the
11727 11746 * lower loop.
11728 11747 */
11729 11748 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
11730 11749 if (woffs >= offs)
11731 11750 woffs = 0;
11732 11751 } else {
11733 11752 woffs = 0;
11734 11753 }
11735 11754
11736 11755 /*
11737 11756 * Now we know that we're going to be storing to the
11738 11757 * top of the buffer and that there is room for us
11739 11758 * there. We need to clear the buffer from the current
11740 11759 * offset to the end (there may be old gunk there).
11741 11760 */
11742 11761 while (offs < buf->dtb_size)
11743 11762 tomax[offs++] = 0;
11744 11763
11745 11764 /*
11746 11765 * We need to set our offset to zero. And because we
11747 11766 * are wrapping, we need to set the bit indicating as
11748 11767 * much. We can also adjust our needed space back
11749 11768 * down to the space required by the ECB -- we know
11750 11769 * that the top of the buffer is aligned.
11751 11770 */
11752 11771 offs = 0;
11753 11772 total = needed;
11754 11773 buf->dtb_flags |= DTRACEBUF_WRAPPED;
11755 11774 } else {
11756 11775 /*
11757 11776 * There is room for us in the buffer, so we simply
11758 11777 * need to check the wrapped offset.
11759 11778 */
11760 11779 if (woffs < offs) {
11761 11780 /*
11762 11781 * The wrapped offset is less than the offset.
11763 11782 * This can happen if we allocated buffer space
11764 11783 * that induced a wrap, but then we didn't
11765 11784 * subsequently take the space due to an error
11766 11785 * or false predicate evaluation. This is
11767 11786 * okay; we know that _this_ allocation isn't
11768 11787 * going to induce a wrap. We still can't
11769 11788 * reset the wrapped offset to be zero,
11770 11789 * however: the space may have been trashed in
11771 11790 * the previous failed probe attempt. But at
11772 11791 * least the wrapped offset doesn't need to
11773 11792 * be adjusted at all...
11774 11793 */
11775 11794 goto out;
11776 11795 }
11777 11796 }
11778 11797
11779 11798 while (offs + total > woffs) {
11780 11799 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
11781 11800 size_t size;
11782 11801
11783 11802 if (epid == DTRACE_EPIDNONE) {
11784 11803 size = sizeof (uint32_t);
11785 11804 } else {
11786 11805 ASSERT3U(epid, <=, state->dts_necbs);
11787 11806 ASSERT(state->dts_ecbs[epid - 1] != NULL);
11788 11807
11789 11808 size = state->dts_ecbs[epid - 1]->dte_size;
11790 11809 }
11791 11810
11792 11811 ASSERT(woffs + size <= buf->dtb_size);
11793 11812 ASSERT(size != 0);
11794 11813
11795 11814 if (woffs + size == buf->dtb_size) {
11796 11815 /*
11797 11816 * We've reached the end of the buffer; we want
11798 11817 * to set the wrapped offset to 0 and break
11799 11818 * out. However, if the offs is 0, then we're
11800 11819 * in a strange edge-condition: the amount of
11801 11820 * space that we want to reserve plus the size
11802 11821 * of the record that we're overwriting is
11803 11822 * greater than the size of the buffer. This
11804 11823 * is problematic because if we reserve the
11805 11824 * space but subsequently don't consume it (due
11806 11825 * to a failed predicate or error) the wrapped
11807 11826 * offset will be 0 -- yet the EPID at offset 0
11808 11827 * will not be committed. This situation is
11809 11828 * relatively easy to deal with: if we're in
11810 11829 * this case, the buffer is indistinguishable
11811 11830 * from one that hasn't wrapped; we need only
11812 11831 * finish the job by clearing the wrapped bit,
11813 11832 * explicitly setting the offset to be 0, and
11814 11833 * zero'ing out the old data in the buffer.
11815 11834 */
11816 11835 if (offs == 0) {
11817 11836 buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
11818 11837 buf->dtb_offset = 0;
11819 11838 woffs = total;
11820 11839
11821 11840 while (woffs < buf->dtb_size)
11822 11841 tomax[woffs++] = 0;
11823 11842 }
11824 11843
11825 11844 woffs = 0;
11826 11845 break;
11827 11846 }
11828 11847
11829 11848 woffs += size;
11830 11849 }
11831 11850
11832 11851 /*
11833 11852 * We have a wrapped offset. It may be that the wrapped offset
11834 11853 * has become zero -- that's okay.
11835 11854 */
11836 11855 buf->dtb_xamot_offset = woffs;
11837 11856 }
11838 11857
11839 11858 out:
11840 11859 /*
11841 11860 * Now we can plow the buffer with any necessary padding.
11842 11861 */
11843 11862 while (offs & (align - 1)) {
11844 11863 /*
11845 11864 * Assert that our alignment is off by a number which
11846 11865 * is itself sizeof (uint32_t) aligned.
11847 11866 */
11848 11867 ASSERT(!((align - (offs & (align - 1))) &
11849 11868 (sizeof (uint32_t) - 1)));
11850 11869 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
11851 11870 offs += sizeof (uint32_t);
11852 11871 }
11853 11872
11854 11873 if (buf->dtb_flags & DTRACEBUF_FILL) {
11855 11874 if (offs + needed > buf->dtb_size - state->dts_reserve) {
11856 11875 buf->dtb_flags |= DTRACEBUF_FULL;
11857 11876 return (-1);
11858 11877 }
11859 11878 }
11860 11879
11861 11880 if (mstate == NULL)
11862 11881 return (offs);
11863 11882
11864 11883 /*
11865 11884 * For ring buffers and fill buffers, the scratch space is always
11866 11885 * the inactive buffer.
11867 11886 */
11868 11887 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
11869 11888 mstate->dtms_scratch_size = buf->dtb_size;
11870 11889 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
11871 11890
11872 11891 return (offs);
11873 11892 }
11874 11893
11875 11894 static void
11876 11895 dtrace_buffer_polish(dtrace_buffer_t *buf)
11877 11896 {
11878 11897 ASSERT(buf->dtb_flags & DTRACEBUF_RING);
11879 11898 ASSERT(MUTEX_HELD(&dtrace_lock));
11880 11899
11881 11900 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
11882 11901 return;
11883 11902
11884 11903 /*
11885 11904 * We need to polish the ring buffer. There are three cases:
11886 11905 *
11887 11906 * - The first (and presumably most common) is that there is no gap
11888 11907 * between the buffer offset and the wrapped offset. In this case,
11889 11908 * there is nothing in the buffer that isn't valid data; we can
11890 11909 * mark the buffer as polished and return.
11891 11910 *
11892 11911 * - The second (less common than the first but still more common
11893 11912 * than the third) is that there is a gap between the buffer offset
11894 11913 * and the wrapped offset, and the wrapped offset is larger than the
11895 11914 * buffer offset. This can happen because of an alignment issue, or
11896 11915 * can happen because of a call to dtrace_buffer_reserve() that
11897 11916 * didn't subsequently consume the buffer space. In this case,
11898 11917 * we need to zero the data from the buffer offset to the wrapped
11899 11918 * offset.
11900 11919 *
11901 11920 * - The third (and least common) is that there is a gap between the
11902 11921 * buffer offset and the wrapped offset, but the wrapped offset is
11903 11922 * _less_ than the buffer offset. This can only happen because a
11904 11923 * call to dtrace_buffer_reserve() induced a wrap, but the space
11905 11924 * was not subsequently consumed. In this case, we need to zero the
11906 11925 * space from the offset to the end of the buffer _and_ from the
11907 11926 * top of the buffer to the wrapped offset.
11908 11927 */
11909 11928 if (buf->dtb_offset < buf->dtb_xamot_offset) {
11910 11929 bzero(buf->dtb_tomax + buf->dtb_offset,
11911 11930 buf->dtb_xamot_offset - buf->dtb_offset);
11912 11931 }
11913 11932
11914 11933 if (buf->dtb_offset > buf->dtb_xamot_offset) {
11915 11934 bzero(buf->dtb_tomax + buf->dtb_offset,
11916 11935 buf->dtb_size - buf->dtb_offset);
11917 11936 bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
11918 11937 }
11919 11938 }
11920 11939
11921 11940 /*
11922 11941 * This routine determines if data generated at the specified time has likely
11923 11942 * been entirely consumed at user-level. This routine is called to determine
11924 11943 * if an ECB on a defunct probe (but for an active enabling) can be safely
11925 11944 * disabled and destroyed.
11926 11945 */
11927 11946 static int
11928 11947 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
11929 11948 {
11930 11949 int i;
11931 11950
11932 11951 for (i = 0; i < NCPU; i++) {
11933 11952 dtrace_buffer_t *buf = &bufs[i];
11934 11953
11935 11954 if (buf->dtb_size == 0)
11936 11955 continue;
11937 11956
11938 11957 if (buf->dtb_flags & DTRACEBUF_RING)
11939 11958 return (0);
11940 11959
11941 11960 if (!buf->dtb_switched && buf->dtb_offset != 0)
11942 11961 return (0);
11943 11962
11944 11963 if (buf->dtb_switched - buf->dtb_interval < when)
11945 11964 return (0);
11946 11965 }
11947 11966
11948 11967 return (1);
11949 11968 }
11950 11969
11951 11970 static void
11952 11971 dtrace_buffer_free(dtrace_buffer_t *bufs)
11953 11972 {
11954 11973 int i;
11955 11974
11956 11975 for (i = 0; i < NCPU; i++) {
11957 11976 dtrace_buffer_t *buf = &bufs[i];
11958 11977
11959 11978 if (buf->dtb_tomax == NULL) {
11960 11979 ASSERT(buf->dtb_xamot == NULL);
11961 11980 ASSERT(buf->dtb_size == 0);
11962 11981 continue;
11963 11982 }
11964 11983
11965 11984 if (buf->dtb_xamot != NULL) {
11966 11985 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
11967 11986 kmem_free(buf->dtb_xamot, buf->dtb_size);
11968 11987 }
11969 11988
11970 11989 kmem_free(buf->dtb_tomax, buf->dtb_size);
11971 11990 buf->dtb_size = 0;
11972 11991 buf->dtb_tomax = NULL;
11973 11992 buf->dtb_xamot = NULL;
11974 11993 }
11975 11994 }
11976 11995
11977 11996 /*
11978 11997 * DTrace Enabling Functions
11979 11998 */
11980 11999 static dtrace_enabling_t *
11981 12000 dtrace_enabling_create(dtrace_vstate_t *vstate)
11982 12001 {
11983 12002 dtrace_enabling_t *enab;
11984 12003
11985 12004 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
11986 12005 enab->dten_vstate = vstate;
11987 12006
11988 12007 return (enab);
11989 12008 }
11990 12009
11991 12010 static void
11992 12011 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
11993 12012 {
11994 12013 dtrace_ecbdesc_t **ndesc;
11995 12014 size_t osize, nsize;
11996 12015
11997 12016 /*
11998 12017 * We can't add to enablings after we've enabled them, or after we've
11999 12018 * retained them.
12000 12019 */
12001 12020 ASSERT(enab->dten_probegen == 0);
12002 12021 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
12003 12022
12004 12023 if (enab->dten_ndesc < enab->dten_maxdesc) {
12005 12024 enab->dten_desc[enab->dten_ndesc++] = ecb;
12006 12025 return;
12007 12026 }
12008 12027
12009 12028 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
12010 12029
12011 12030 if (enab->dten_maxdesc == 0) {
12012 12031 enab->dten_maxdesc = 1;
12013 12032 } else {
12014 12033 enab->dten_maxdesc <<= 1;
12015 12034 }
12016 12035
12017 12036 ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
12018 12037
12019 12038 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
12020 12039 ndesc = kmem_zalloc(nsize, KM_SLEEP);
12021 12040 bcopy(enab->dten_desc, ndesc, osize);
12022 12041 kmem_free(enab->dten_desc, osize);
12023 12042
12024 12043 enab->dten_desc = ndesc;
12025 12044 enab->dten_desc[enab->dten_ndesc++] = ecb;
12026 12045 }
12027 12046
12028 12047 static void
12029 12048 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
12030 12049 dtrace_probedesc_t *pd)
12031 12050 {
12032 12051 dtrace_ecbdesc_t *new;
12033 12052 dtrace_predicate_t *pred;
12034 12053 dtrace_actdesc_t *act;
12035 12054
12036 12055 /*
12037 12056 * We're going to create a new ECB description that matches the
12038 12057 * specified ECB in every way, but has the specified probe description.
12039 12058 */
12040 12059 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
12041 12060
12042 12061 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
12043 12062 dtrace_predicate_hold(pred);
12044 12063
12045 12064 for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
12046 12065 dtrace_actdesc_hold(act);
12047 12066
12048 12067 new->dted_action = ecb->dted_action;
12049 12068 new->dted_pred = ecb->dted_pred;
12050 12069 new->dted_probe = *pd;
12051 12070 new->dted_uarg = ecb->dted_uarg;
12052 12071
12053 12072 dtrace_enabling_add(enab, new);
12054 12073 }
12055 12074
12056 12075 static void
12057 12076 dtrace_enabling_dump(dtrace_enabling_t *enab)
12058 12077 {
12059 12078 int i;
12060 12079
12061 12080 for (i = 0; i < enab->dten_ndesc; i++) {
12062 12081 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
12063 12082
12064 12083 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
12065 12084 desc->dtpd_provider, desc->dtpd_mod,
12066 12085 desc->dtpd_func, desc->dtpd_name);
12067 12086 }
12068 12087 }
12069 12088
12070 12089 static void
12071 12090 dtrace_enabling_destroy(dtrace_enabling_t *enab)
12072 12091 {
12073 12092 int i;
12074 12093 dtrace_ecbdesc_t *ep;
12075 12094 dtrace_vstate_t *vstate = enab->dten_vstate;
12076 12095
12077 12096 ASSERT(MUTEX_HELD(&dtrace_lock));
12078 12097
12079 12098 for (i = 0; i < enab->dten_ndesc; i++) {
12080 12099 dtrace_actdesc_t *act, *next;
12081 12100 dtrace_predicate_t *pred;
12082 12101
12083 12102 ep = enab->dten_desc[i];
12084 12103
12085 12104 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
12086 12105 dtrace_predicate_release(pred, vstate);
12087 12106
12088 12107 for (act = ep->dted_action; act != NULL; act = next) {
12089 12108 next = act->dtad_next;
12090 12109 dtrace_actdesc_release(act, vstate);
12091 12110 }
12092 12111
12093 12112 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12094 12113 }
12095 12114
12096 12115 kmem_free(enab->dten_desc,
12097 12116 enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
12098 12117
12099 12118 /*
12100 12119 * If this was a retained enabling, decrement the dts_nretained count
12101 12120 * and take it off of the dtrace_retained list.
12102 12121 */
12103 12122 if (enab->dten_prev != NULL || enab->dten_next != NULL ||
12104 12123 dtrace_retained == enab) {
12105 12124 ASSERT(enab->dten_vstate->dtvs_state != NULL);
12106 12125 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
12107 12126 enab->dten_vstate->dtvs_state->dts_nretained--;
12108 12127 dtrace_retained_gen++;
12109 12128 }
12110 12129
12111 12130 if (enab->dten_prev == NULL) {
12112 12131 if (dtrace_retained == enab) {
12113 12132 dtrace_retained = enab->dten_next;
12114 12133
12115 12134 if (dtrace_retained != NULL)
12116 12135 dtrace_retained->dten_prev = NULL;
12117 12136 }
12118 12137 } else {
12119 12138 ASSERT(enab != dtrace_retained);
12120 12139 ASSERT(dtrace_retained != NULL);
12121 12140 enab->dten_prev->dten_next = enab->dten_next;
12122 12141 }
12123 12142
12124 12143 if (enab->dten_next != NULL) {
12125 12144 ASSERT(dtrace_retained != NULL);
12126 12145 enab->dten_next->dten_prev = enab->dten_prev;
12127 12146 }
12128 12147
12129 12148 kmem_free(enab, sizeof (dtrace_enabling_t));
12130 12149 }
12131 12150
12132 12151 static int
12133 12152 dtrace_enabling_retain(dtrace_enabling_t *enab)
12134 12153 {
12135 12154 dtrace_state_t *state;
12136 12155
12137 12156 ASSERT(MUTEX_HELD(&dtrace_lock));
12138 12157 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
12139 12158 ASSERT(enab->dten_vstate != NULL);
12140 12159
12141 12160 state = enab->dten_vstate->dtvs_state;
12142 12161 ASSERT(state != NULL);
12143 12162
12144 12163 /*
12145 12164 * We only allow each state to retain dtrace_retain_max enablings.
12146 12165 */
12147 12166 if (state->dts_nretained >= dtrace_retain_max)
12148 12167 return (ENOSPC);
12149 12168
12150 12169 state->dts_nretained++;
12151 12170 dtrace_retained_gen++;
12152 12171
12153 12172 if (dtrace_retained == NULL) {
12154 12173 dtrace_retained = enab;
12155 12174 return (0);
12156 12175 }
12157 12176
12158 12177 enab->dten_next = dtrace_retained;
12159 12178 dtrace_retained->dten_prev = enab;
12160 12179 dtrace_retained = enab;
12161 12180
12162 12181 return (0);
12163 12182 }
12164 12183
12165 12184 static int
12166 12185 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
12167 12186 dtrace_probedesc_t *create)
12168 12187 {
12169 12188 dtrace_enabling_t *new, *enab;
12170 12189 int found = 0, err = ENOENT;
12171 12190
12172 12191 ASSERT(MUTEX_HELD(&dtrace_lock));
12173 12192 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
12174 12193 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
12175 12194 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
12176 12195 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
12177 12196
12178 12197 new = dtrace_enabling_create(&state->dts_vstate);
12179 12198
12180 12199 /*
12181 12200 * Iterate over all retained enablings, looking for enablings that
12182 12201 * match the specified state.
12183 12202 */
12184 12203 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
12185 12204 int i;
12186 12205
12187 12206 /*
12188 12207 * dtvs_state can only be NULL for helper enablings -- and
12189 12208 * helper enablings can't be retained.
12190 12209 */
12191 12210 ASSERT(enab->dten_vstate->dtvs_state != NULL);
12192 12211
12193 12212 if (enab->dten_vstate->dtvs_state != state)
12194 12213 continue;
12195 12214
12196 12215 /*
12197 12216 * Now iterate over each probe description; we're looking for
12198 12217 * an exact match to the specified probe description.
12199 12218 */
12200 12219 for (i = 0; i < enab->dten_ndesc; i++) {
12201 12220 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
12202 12221 dtrace_probedesc_t *pd = &ep->dted_probe;
12203 12222
12204 12223 if (strcmp(pd->dtpd_provider, match->dtpd_provider))
12205 12224 continue;
12206 12225
12207 12226 if (strcmp(pd->dtpd_mod, match->dtpd_mod))
12208 12227 continue;
12209 12228
12210 12229 if (strcmp(pd->dtpd_func, match->dtpd_func))
12211 12230 continue;
12212 12231
12213 12232 if (strcmp(pd->dtpd_name, match->dtpd_name))
12214 12233 continue;
12215 12234
12216 12235 /*
12217 12236 * We have a winning probe! Add it to our growing
12218 12237 * enabling.
12219 12238 */
12220 12239 found = 1;
12221 12240 dtrace_enabling_addlike(new, ep, create);
12222 12241 }
12223 12242 }
12224 12243
12225 12244 if (!found || (err = dtrace_enabling_retain(new)) != 0) {
12226 12245 dtrace_enabling_destroy(new);
12227 12246 return (err);
12228 12247 }
12229 12248
12230 12249 return (0);
12231 12250 }
12232 12251
12233 12252 static void
12234 12253 dtrace_enabling_retract(dtrace_state_t *state)
12235 12254 {
12236 12255 dtrace_enabling_t *enab, *next;
12237 12256
12238 12257 ASSERT(MUTEX_HELD(&dtrace_lock));
12239 12258
12240 12259 /*
12241 12260 * Iterate over all retained enablings, destroy the enablings retained
12242 12261 * for the specified state.
12243 12262 */
12244 12263 for (enab = dtrace_retained; enab != NULL; enab = next) {
12245 12264 next = enab->dten_next;
12246 12265
12247 12266 /*
12248 12267 * dtvs_state can only be NULL for helper enablings -- and
12249 12268 * helper enablings can't be retained.
12250 12269 */
12251 12270 ASSERT(enab->dten_vstate->dtvs_state != NULL);
12252 12271
12253 12272 if (enab->dten_vstate->dtvs_state == state) {
12254 12273 ASSERT(state->dts_nretained > 0);
12255 12274 dtrace_enabling_destroy(enab);
12256 12275 }
12257 12276 }
12258 12277
12259 12278 ASSERT(state->dts_nretained == 0);
12260 12279 }
12261 12280
12262 12281 static int
12263 12282 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
12264 12283 {
12265 12284 int i = 0;
12266 12285 int total_matched = 0, matched = 0;
12267 12286
12268 12287 ASSERT(MUTEX_HELD(&cpu_lock));
12269 12288 ASSERT(MUTEX_HELD(&dtrace_lock));
12270 12289
12271 12290 for (i = 0; i < enab->dten_ndesc; i++) {
12272 12291 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
12273 12292
12274 12293 enab->dten_current = ep;
12275 12294 enab->dten_error = 0;
12276 12295
12277 12296 /*
12278 12297 * If a provider failed to enable a probe then get out and
12279 12298 * let the consumer know we failed.
12280 12299 */
12281 12300 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
12282 12301 return (EBUSY);
12283 12302
12284 12303 total_matched += matched;
12285 12304
12286 12305 if (enab->dten_error != 0) {
12287 12306 /*
12288 12307 * If we get an error half-way through enabling the
12289 12308 * probes, we kick out -- perhaps with some number of
12290 12309 * them enabled. Leaving enabled probes enabled may
12291 12310 * be slightly confusing for user-level, but we expect
12292 12311 * that no one will attempt to actually drive on in
12293 12312 * the face of such errors. If this is an anonymous
12294 12313 * enabling (indicated with a NULL nmatched pointer),
12295 12314 * we cmn_err() a message. We aren't expecting to
12296 12315 * get such an error -- such as it can exist at all,
12297 12316 * it would be a result of corrupted DOF in the driver
12298 12317 * properties.
12299 12318 */
12300 12319 if (nmatched == NULL) {
12301 12320 cmn_err(CE_WARN, "dtrace_enabling_match() "
12302 12321 "error on %p: %d", (void *)ep,
12303 12322 enab->dten_error);
12304 12323 }
12305 12324
12306 12325 return (enab->dten_error);
12307 12326 }
12308 12327 }
12309 12328
12310 12329 enab->dten_probegen = dtrace_probegen;
12311 12330 if (nmatched != NULL)
12312 12331 *nmatched = total_matched;
12313 12332
12314 12333 return (0);
12315 12334 }
12316 12335
12317 12336 static void
12318 12337 dtrace_enabling_matchall(void)
12319 12338 {
12320 12339 dtrace_enabling_t *enab;
12321 12340
12322 12341 mutex_enter(&cpu_lock);
12323 12342 mutex_enter(&dtrace_lock);
12324 12343
12325 12344 /*
12326 12345 * Iterate over all retained enablings to see if any probes match
12327 12346 * against them. We only perform this operation on enablings for which
12328 12347 * we have sufficient permissions by virtue of being in the global zone
12329 12348 * or in the same zone as the DTrace client. Because we can be called
12330 12349 * after dtrace_detach() has been called, we cannot assert that there
12331 12350 * are retained enablings. We can safely load from dtrace_retained,
12332 12351 * however: the taskq_destroy() at the end of dtrace_detach() will
12333 12352 * block pending our completion.
12334 12353 */
12335 12354 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
12336 12355 dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
12337 12356 cred_t *cr = dcr->dcr_cred;
12338 12357 zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
12339 12358
12340 12359 if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
12341 12360 (zone == GLOBAL_ZONEID || getzoneid() == zone)))
12342 12361 (void) dtrace_enabling_match(enab, NULL);
12343 12362 }
12344 12363
12345 12364 mutex_exit(&dtrace_lock);
12346 12365 mutex_exit(&cpu_lock);
12347 12366 }
12348 12367
12349 12368 /*
12350 12369 * If an enabling is to be enabled without having matched probes (that is, if
12351 12370 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
12352 12371 * enabling must be _primed_ by creating an ECB for every ECB description.
12353 12372 * This must be done to assure that we know the number of speculations, the
12354 12373 * number of aggregations, the minimum buffer size needed, etc. before we
12355 12374 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
12356 12375 * enabling any probes, we create ECBs for every ECB decription, but with a
12357 12376 * NULL probe -- which is exactly what this function does.
12358 12377 */
12359 12378 static void
12360 12379 dtrace_enabling_prime(dtrace_state_t *state)
12361 12380 {
12362 12381 dtrace_enabling_t *enab;
12363 12382 int i;
12364 12383
12365 12384 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
12366 12385 ASSERT(enab->dten_vstate->dtvs_state != NULL);
12367 12386
12368 12387 if (enab->dten_vstate->dtvs_state != state)
12369 12388 continue;
12370 12389
12371 12390 /*
12372 12391 * We don't want to prime an enabling more than once, lest
12373 12392 * we allow a malicious user to induce resource exhaustion.
12374 12393 * (The ECBs that result from priming an enabling aren't
12375 12394 * leaked -- but they also aren't deallocated until the
12376 12395 * consumer state is destroyed.)
12377 12396 */
12378 12397 if (enab->dten_primed)
12379 12398 continue;
12380 12399
12381 12400 for (i = 0; i < enab->dten_ndesc; i++) {
12382 12401 enab->dten_current = enab->dten_desc[i];
12383 12402 (void) dtrace_probe_enable(NULL, enab);
12384 12403 }
12385 12404
12386 12405 enab->dten_primed = 1;
12387 12406 }
12388 12407 }
12389 12408
12390 12409 /*
12391 12410 * Called to indicate that probes should be provided due to retained
12392 12411 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
12393 12412 * must take an initial lap through the enabling calling the dtps_provide()
12394 12413 * entry point explicitly to allow for autocreated probes.
12395 12414 */
12396 12415 static void
12397 12416 dtrace_enabling_provide(dtrace_provider_t *prv)
12398 12417 {
12399 12418 int i, all = 0;
12400 12419 dtrace_probedesc_t desc;
12401 12420 dtrace_genid_t gen;
12402 12421
12403 12422 ASSERT(MUTEX_HELD(&dtrace_lock));
12404 12423 ASSERT(MUTEX_HELD(&dtrace_provider_lock));
12405 12424
12406 12425 if (prv == NULL) {
12407 12426 all = 1;
12408 12427 prv = dtrace_provider;
12409 12428 }
12410 12429
12411 12430 do {
12412 12431 dtrace_enabling_t *enab;
12413 12432 void *parg = prv->dtpv_arg;
12414 12433
12415 12434 retry:
12416 12435 gen = dtrace_retained_gen;
12417 12436 for (enab = dtrace_retained; enab != NULL;
12418 12437 enab = enab->dten_next) {
12419 12438 for (i = 0; i < enab->dten_ndesc; i++) {
12420 12439 desc = enab->dten_desc[i]->dted_probe;
12421 12440 mutex_exit(&dtrace_lock);
12422 12441 prv->dtpv_pops.dtps_provide(parg, &desc);
12423 12442 mutex_enter(&dtrace_lock);
12424 12443 /*
12425 12444 * Process the retained enablings again if
12426 12445 * they have changed while we weren't holding
12427 12446 * dtrace_lock.
12428 12447 */
12429 12448 if (gen != dtrace_retained_gen)
12430 12449 goto retry;
12431 12450 }
12432 12451 }
12433 12452 } while (all && (prv = prv->dtpv_next) != NULL);
12434 12453
12435 12454 mutex_exit(&dtrace_lock);
12436 12455 dtrace_probe_provide(NULL, all ? NULL : prv);
12437 12456 mutex_enter(&dtrace_lock);
12438 12457 }
12439 12458
12440 12459 /*
12441 12460 * Called to reap ECBs that are attached to probes from defunct providers.
12442 12461 */
12443 12462 static void
12444 12463 dtrace_enabling_reap(void)
12445 12464 {
12446 12465 dtrace_provider_t *prov;
12447 12466 dtrace_probe_t *probe;
12448 12467 dtrace_ecb_t *ecb;
12449 12468 hrtime_t when;
12450 12469 int i;
12451 12470
12452 12471 mutex_enter(&cpu_lock);
12453 12472 mutex_enter(&dtrace_lock);
12454 12473
12455 12474 for (i = 0; i < dtrace_nprobes; i++) {
12456 12475 if ((probe = dtrace_probes[i]) == NULL)
12457 12476 continue;
12458 12477
12459 12478 if (probe->dtpr_ecb == NULL)
12460 12479 continue;
12461 12480
12462 12481 prov = probe->dtpr_provider;
12463 12482
12464 12483 if ((when = prov->dtpv_defunct) == 0)
12465 12484 continue;
12466 12485
12467 12486 /*
12468 12487 * We have ECBs on a defunct provider: we want to reap these
12469 12488 * ECBs to allow the provider to unregister. The destruction
12470 12489 * of these ECBs must be done carefully: if we destroy the ECB
12471 12490 * and the consumer later wishes to consume an EPID that
12472 12491 * corresponds to the destroyed ECB (and if the EPID metadata
12473 12492 * has not been previously consumed), the consumer will abort
12474 12493 * processing on the unknown EPID. To reduce (but not, sadly,
12475 12494 * eliminate) the possibility of this, we will only destroy an
12476 12495 * ECB for a defunct provider if, for the state that
12477 12496 * corresponds to the ECB:
12478 12497 *
12479 12498 * (a) There is no speculative tracing (which can effectively
12480 12499 * cache an EPID for an arbitrary amount of time).
12481 12500 *
12482 12501 * (b) The principal buffers have been switched twice since the
12483 12502 * provider became defunct.
12484 12503 *
12485 12504 * (c) The aggregation buffers are of zero size or have been
12486 12505 * switched twice since the provider became defunct.
12487 12506 *
12488 12507 * We use dts_speculates to determine (a) and call a function
12489 12508 * (dtrace_buffer_consumed()) to determine (b) and (c). Note
12490 12509 * that as soon as we've been unable to destroy one of the ECBs
12491 12510 * associated with the probe, we quit trying -- reaping is only
12492 12511 * fruitful in as much as we can destroy all ECBs associated
12493 12512 * with the defunct provider's probes.
12494 12513 */
12495 12514 while ((ecb = probe->dtpr_ecb) != NULL) {
12496 12515 dtrace_state_t *state = ecb->dte_state;
12497 12516 dtrace_buffer_t *buf = state->dts_buffer;
12498 12517 dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
12499 12518
12500 12519 if (state->dts_speculates)
12501 12520 break;
12502 12521
12503 12522 if (!dtrace_buffer_consumed(buf, when))
12504 12523 break;
12505 12524
12506 12525 if (!dtrace_buffer_consumed(aggbuf, when))
12507 12526 break;
12508 12527
12509 12528 dtrace_ecb_disable(ecb);
12510 12529 ASSERT(probe->dtpr_ecb != ecb);
12511 12530 dtrace_ecb_destroy(ecb);
12512 12531 }
12513 12532 }
12514 12533
12515 12534 mutex_exit(&dtrace_lock);
12516 12535 mutex_exit(&cpu_lock);
12517 12536 }
12518 12537
12519 12538 /*
12520 12539 * DTrace DOF Functions
12521 12540 */
12522 12541 /*ARGSUSED*/
12523 12542 static void
12524 12543 dtrace_dof_error(dof_hdr_t *dof, const char *str)
12525 12544 {
12526 12545 if (dtrace_err_verbose)
12527 12546 cmn_err(CE_WARN, "failed to process DOF: %s", str);
12528 12547
12529 12548 #ifdef DTRACE_ERRDEBUG
12530 12549 dtrace_errdebug(str);
12531 12550 #endif
12532 12551 }
12533 12552
12534 12553 /*
12535 12554 * Create DOF out of a currently enabled state. Right now, we only create
12536 12555 * DOF containing the run-time options -- but this could be expanded to create
12537 12556 * complete DOF representing the enabled state.
12538 12557 */
12539 12558 static dof_hdr_t *
12540 12559 dtrace_dof_create(dtrace_state_t *state)
12541 12560 {
12542 12561 dof_hdr_t *dof;
12543 12562 dof_sec_t *sec;
12544 12563 dof_optdesc_t *opt;
12545 12564 int i, len = sizeof (dof_hdr_t) +
12546 12565 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
12547 12566 sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
12548 12567
12549 12568 ASSERT(MUTEX_HELD(&dtrace_lock));
12550 12569
12551 12570 dof = kmem_zalloc(len, KM_SLEEP);
12552 12571 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
12553 12572 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
12554 12573 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
12555 12574 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
12556 12575
12557 12576 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
12558 12577 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
12559 12578 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
12560 12579 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
12561 12580 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
12562 12581 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
12563 12582
12564 12583 dof->dofh_flags = 0;
12565 12584 dof->dofh_hdrsize = sizeof (dof_hdr_t);
12566 12585 dof->dofh_secsize = sizeof (dof_sec_t);
12567 12586 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
12568 12587 dof->dofh_secoff = sizeof (dof_hdr_t);
12569 12588 dof->dofh_loadsz = len;
12570 12589 dof->dofh_filesz = len;
12571 12590 dof->dofh_pad = 0;
12572 12591
12573 12592 /*
12574 12593 * Fill in the option section header...
12575 12594 */
12576 12595 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
12577 12596 sec->dofs_type = DOF_SECT_OPTDESC;
12578 12597 sec->dofs_align = sizeof (uint64_t);
12579 12598 sec->dofs_flags = DOF_SECF_LOAD;
12580 12599 sec->dofs_entsize = sizeof (dof_optdesc_t);
12581 12600
12582 12601 opt = (dof_optdesc_t *)((uintptr_t)sec +
12583 12602 roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
12584 12603
12585 12604 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
12586 12605 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
12587 12606
12588 12607 for (i = 0; i < DTRACEOPT_MAX; i++) {
12589 12608 opt[i].dofo_option = i;
12590 12609 opt[i].dofo_strtab = DOF_SECIDX_NONE;
12591 12610 opt[i].dofo_value = state->dts_options[i];
12592 12611 }
12593 12612
12594 12613 return (dof);
12595 12614 }
12596 12615
12597 12616 static dof_hdr_t *
12598 12617 dtrace_dof_copyin(uintptr_t uarg, int *errp)
12599 12618 {
12600 12619 dof_hdr_t hdr, *dof;
12601 12620
12602 12621 ASSERT(!MUTEX_HELD(&dtrace_lock));
12603 12622
12604 12623 /*
12605 12624 * First, we're going to copyin() the sizeof (dof_hdr_t).
12606 12625 */
12607 12626 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
12608 12627 dtrace_dof_error(NULL, "failed to copyin DOF header");
12609 12628 *errp = EFAULT;
12610 12629 return (NULL);
12611 12630 }
12612 12631
12613 12632 /*
12614 12633 * Now we'll allocate the entire DOF and copy it in -- provided
12615 12634 * that the length isn't outrageous.
12616 12635 */
12617 12636 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
12618 12637 dtrace_dof_error(&hdr, "load size exceeds maximum");
12619 12638 *errp = E2BIG;
12620 12639 return (NULL);
12621 12640 }
12622 12641
12623 12642 if (hdr.dofh_loadsz < sizeof (hdr)) {
12624 12643 dtrace_dof_error(&hdr, "invalid load size");
12625 12644 *errp = EINVAL;
12626 12645 return (NULL);
12627 12646 }
12628 12647
12629 12648 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
12630 12649
12631 12650 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
12632 12651 dof->dofh_loadsz != hdr.dofh_loadsz) {
12633 12652 kmem_free(dof, hdr.dofh_loadsz);
12634 12653 *errp = EFAULT;
12635 12654 return (NULL);
12636 12655 }
12637 12656
12638 12657 return (dof);
12639 12658 }
12640 12659
12641 12660 static dof_hdr_t *
12642 12661 dtrace_dof_property(const char *name)
12643 12662 {
12644 12663 uchar_t *buf;
12645 12664 uint64_t loadsz;
12646 12665 unsigned int len, i;
12647 12666 dof_hdr_t *dof;
12648 12667
12649 12668 /*
12650 12669 * Unfortunately, array of values in .conf files are always (and
12651 12670 * only) interpreted to be integer arrays. We must read our DOF
12652 12671 * as an integer array, and then squeeze it into a byte array.
12653 12672 */
12654 12673 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
12655 12674 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
12656 12675 return (NULL);
12657 12676
12658 12677 for (i = 0; i < len; i++)
12659 12678 buf[i] = (uchar_t)(((int *)buf)[i]);
12660 12679
12661 12680 if (len < sizeof (dof_hdr_t)) {
12662 12681 ddi_prop_free(buf);
12663 12682 dtrace_dof_error(NULL, "truncated header");
12664 12683 return (NULL);
12665 12684 }
12666 12685
12667 12686 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
12668 12687 ddi_prop_free(buf);
12669 12688 dtrace_dof_error(NULL, "truncated DOF");
12670 12689 return (NULL);
12671 12690 }
12672 12691
12673 12692 if (loadsz >= dtrace_dof_maxsize) {
12674 12693 ddi_prop_free(buf);
12675 12694 dtrace_dof_error(NULL, "oversized DOF");
12676 12695 return (NULL);
12677 12696 }
12678 12697
12679 12698 dof = kmem_alloc(loadsz, KM_SLEEP);
12680 12699 bcopy(buf, dof, loadsz);
12681 12700 ddi_prop_free(buf);
12682 12701
12683 12702 return (dof);
12684 12703 }
12685 12704
12686 12705 static void
12687 12706 dtrace_dof_destroy(dof_hdr_t *dof)
12688 12707 {
12689 12708 kmem_free(dof, dof->dofh_loadsz);
12690 12709 }
12691 12710
12692 12711 /*
12693 12712 * Return the dof_sec_t pointer corresponding to a given section index. If the
12694 12713 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
12695 12714 * a type other than DOF_SECT_NONE is specified, the header is checked against
12696 12715 * this type and NULL is returned if the types do not match.
12697 12716 */
12698 12717 static dof_sec_t *
12699 12718 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
12700 12719 {
12701 12720 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
12702 12721 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
12703 12722
12704 12723 if (i >= dof->dofh_secnum) {
12705 12724 dtrace_dof_error(dof, "referenced section index is invalid");
12706 12725 return (NULL);
12707 12726 }
12708 12727
12709 12728 if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
12710 12729 dtrace_dof_error(dof, "referenced section is not loadable");
12711 12730 return (NULL);
12712 12731 }
12713 12732
12714 12733 if (type != DOF_SECT_NONE && type != sec->dofs_type) {
12715 12734 dtrace_dof_error(dof, "referenced section is the wrong type");
12716 12735 return (NULL);
12717 12736 }
12718 12737
12719 12738 return (sec);
12720 12739 }
12721 12740
12722 12741 static dtrace_probedesc_t *
12723 12742 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
12724 12743 {
12725 12744 dof_probedesc_t *probe;
12726 12745 dof_sec_t *strtab;
12727 12746 uintptr_t daddr = (uintptr_t)dof;
12728 12747 uintptr_t str;
12729 12748 size_t size;
12730 12749
12731 12750 if (sec->dofs_type != DOF_SECT_PROBEDESC) {
12732 12751 dtrace_dof_error(dof, "invalid probe section");
12733 12752 return (NULL);
12734 12753 }
12735 12754
12736 12755 if (sec->dofs_align != sizeof (dof_secidx_t)) {
12737 12756 dtrace_dof_error(dof, "bad alignment in probe description");
12738 12757 return (NULL);
12739 12758 }
12740 12759
12741 12760 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
12742 12761 dtrace_dof_error(dof, "truncated probe description");
12743 12762 return (NULL);
12744 12763 }
12745 12764
12746 12765 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
12747 12766 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
12748 12767
12749 12768 if (strtab == NULL)
12750 12769 return (NULL);
12751 12770
12752 12771 str = daddr + strtab->dofs_offset;
12753 12772 size = strtab->dofs_size;
12754 12773
12755 12774 if (probe->dofp_provider >= strtab->dofs_size) {
12756 12775 dtrace_dof_error(dof, "corrupt probe provider");
12757 12776 return (NULL);
12758 12777 }
12759 12778
12760 12779 (void) strncpy(desc->dtpd_provider,
12761 12780 (char *)(str + probe->dofp_provider),
12762 12781 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
12763 12782
12764 12783 if (probe->dofp_mod >= strtab->dofs_size) {
12765 12784 dtrace_dof_error(dof, "corrupt probe module");
12766 12785 return (NULL);
12767 12786 }
12768 12787
12769 12788 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
12770 12789 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
12771 12790
12772 12791 if (probe->dofp_func >= strtab->dofs_size) {
12773 12792 dtrace_dof_error(dof, "corrupt probe function");
12774 12793 return (NULL);
12775 12794 }
12776 12795
12777 12796 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
12778 12797 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
12779 12798
12780 12799 if (probe->dofp_name >= strtab->dofs_size) {
12781 12800 dtrace_dof_error(dof, "corrupt probe name");
12782 12801 return (NULL);
12783 12802 }
12784 12803
12785 12804 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
12786 12805 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
12787 12806
12788 12807 return (desc);
12789 12808 }
12790 12809
12791 12810 static dtrace_difo_t *
12792 12811 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12793 12812 cred_t *cr)
12794 12813 {
12795 12814 dtrace_difo_t *dp;
12796 12815 size_t ttl = 0;
12797 12816 dof_difohdr_t *dofd;
12798 12817 uintptr_t daddr = (uintptr_t)dof;
12799 12818 size_t max = dtrace_difo_maxsize;
12800 12819 int i, l, n;
12801 12820
12802 12821 static const struct {
12803 12822 int section;
12804 12823 int bufoffs;
12805 12824 int lenoffs;
12806 12825 int entsize;
12807 12826 int align;
12808 12827 const char *msg;
12809 12828 } difo[] = {
12810 12829 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
12811 12830 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
12812 12831 sizeof (dif_instr_t), "multiple DIF sections" },
12813 12832
12814 12833 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
12815 12834 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
12816 12835 sizeof (uint64_t), "multiple integer tables" },
12817 12836
12818 12837 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
12819 12838 offsetof(dtrace_difo_t, dtdo_strlen), 0,
12820 12839 sizeof (char), "multiple string tables" },
12821 12840
12822 12841 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
12823 12842 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
12824 12843 sizeof (uint_t), "multiple variable tables" },
12825 12844
12826 12845 { DOF_SECT_NONE, 0, 0, 0, NULL }
12827 12846 };
12828 12847
12829 12848 if (sec->dofs_type != DOF_SECT_DIFOHDR) {
12830 12849 dtrace_dof_error(dof, "invalid DIFO header section");
12831 12850 return (NULL);
12832 12851 }
12833 12852
12834 12853 if (sec->dofs_align != sizeof (dof_secidx_t)) {
12835 12854 dtrace_dof_error(dof, "bad alignment in DIFO header");
12836 12855 return (NULL);
12837 12856 }
12838 12857
12839 12858 if (sec->dofs_size < sizeof (dof_difohdr_t) ||
12840 12859 sec->dofs_size % sizeof (dof_secidx_t)) {
12841 12860 dtrace_dof_error(dof, "bad size in DIFO header");
12842 12861 return (NULL);
12843 12862 }
12844 12863
12845 12864 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
12846 12865 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
12847 12866
12848 12867 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
12849 12868 dp->dtdo_rtype = dofd->dofd_rtype;
12850 12869
12851 12870 for (l = 0; l < n; l++) {
12852 12871 dof_sec_t *subsec;
12853 12872 void **bufp;
12854 12873 uint32_t *lenp;
12855 12874
12856 12875 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
12857 12876 dofd->dofd_links[l])) == NULL)
12858 12877 goto err; /* invalid section link */
12859 12878
12860 12879 if (ttl + subsec->dofs_size > max) {
12861 12880 dtrace_dof_error(dof, "exceeds maximum size");
12862 12881 goto err;
12863 12882 }
12864 12883
12865 12884 ttl += subsec->dofs_size;
12866 12885
12867 12886 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
12868 12887 if (subsec->dofs_type != difo[i].section)
12869 12888 continue;
12870 12889
12871 12890 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
12872 12891 dtrace_dof_error(dof, "section not loaded");
12873 12892 goto err;
12874 12893 }
12875 12894
12876 12895 if (subsec->dofs_align != difo[i].align) {
12877 12896 dtrace_dof_error(dof, "bad alignment");
12878 12897 goto err;
12879 12898 }
12880 12899
12881 12900 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
12882 12901 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
12883 12902
12884 12903 if (*bufp != NULL) {
12885 12904 dtrace_dof_error(dof, difo[i].msg);
12886 12905 goto err;
12887 12906 }
12888 12907
12889 12908 if (difo[i].entsize != subsec->dofs_entsize) {
12890 12909 dtrace_dof_error(dof, "entry size mismatch");
12891 12910 goto err;
12892 12911 }
12893 12912
12894 12913 if (subsec->dofs_entsize != 0 &&
12895 12914 (subsec->dofs_size % subsec->dofs_entsize) != 0) {
12896 12915 dtrace_dof_error(dof, "corrupt entry size");
12897 12916 goto err;
12898 12917 }
12899 12918
12900 12919 *lenp = subsec->dofs_size;
12901 12920 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
12902 12921 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
12903 12922 *bufp, subsec->dofs_size);
12904 12923
12905 12924 if (subsec->dofs_entsize != 0)
12906 12925 *lenp /= subsec->dofs_entsize;
12907 12926
12908 12927 break;
12909 12928 }
12910 12929
12911 12930 /*
12912 12931 * If we encounter a loadable DIFO sub-section that is not
12913 12932 * known to us, assume this is a broken program and fail.
12914 12933 */
12915 12934 if (difo[i].section == DOF_SECT_NONE &&
12916 12935 (subsec->dofs_flags & DOF_SECF_LOAD)) {
12917 12936 dtrace_dof_error(dof, "unrecognized DIFO subsection");
12918 12937 goto err;
12919 12938 }
12920 12939 }
12921 12940
12922 12941 if (dp->dtdo_buf == NULL) {
12923 12942 /*
12924 12943 * We can't have a DIF object without DIF text.
12925 12944 */
12926 12945 dtrace_dof_error(dof, "missing DIF text");
12927 12946 goto err;
12928 12947 }
12929 12948
12930 12949 /*
12931 12950 * Before we validate the DIF object, run through the variable table
12932 12951 * looking for the strings -- if any of their size are under, we'll set
12933 12952 * their size to be the system-wide default string size. Note that
12934 12953 * this should _not_ happen if the "strsize" option has been set --
12935 12954 * in this case, the compiler should have set the size to reflect the
12936 12955 * setting of the option.
12937 12956 */
12938 12957 for (i = 0; i < dp->dtdo_varlen; i++) {
12939 12958 dtrace_difv_t *v = &dp->dtdo_vartab[i];
12940 12959 dtrace_diftype_t *t = &v->dtdv_type;
12941 12960
12942 12961 if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
12943 12962 continue;
12944 12963
12945 12964 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
12946 12965 t->dtdt_size = dtrace_strsize_default;
12947 12966 }
12948 12967
12949 12968 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
12950 12969 goto err;
12951 12970
12952 12971 dtrace_difo_init(dp, vstate);
12953 12972 return (dp);
12954 12973
12955 12974 err:
12956 12975 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
12957 12976 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
12958 12977 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
12959 12978 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
12960 12979
12961 12980 kmem_free(dp, sizeof (dtrace_difo_t));
12962 12981 return (NULL);
12963 12982 }
12964 12983
12965 12984 static dtrace_predicate_t *
12966 12985 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12967 12986 cred_t *cr)
12968 12987 {
12969 12988 dtrace_difo_t *dp;
12970 12989
12971 12990 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
12972 12991 return (NULL);
12973 12992
12974 12993 return (dtrace_predicate_create(dp));
12975 12994 }
12976 12995
12977 12996 static dtrace_actdesc_t *
12978 12997 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12979 12998 cred_t *cr)
12980 12999 {
12981 13000 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
12982 13001 dof_actdesc_t *desc;
12983 13002 dof_sec_t *difosec;
12984 13003 size_t offs;
12985 13004 uintptr_t daddr = (uintptr_t)dof;
12986 13005 uint64_t arg;
12987 13006 dtrace_actkind_t kind;
12988 13007
12989 13008 if (sec->dofs_type != DOF_SECT_ACTDESC) {
12990 13009 dtrace_dof_error(dof, "invalid action section");
12991 13010 return (NULL);
12992 13011 }
12993 13012
12994 13013 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
12995 13014 dtrace_dof_error(dof, "truncated action description");
12996 13015 return (NULL);
12997 13016 }
12998 13017
12999 13018 if (sec->dofs_align != sizeof (uint64_t)) {
13000 13019 dtrace_dof_error(dof, "bad alignment in action description");
13001 13020 return (NULL);
13002 13021 }
13003 13022
13004 13023 if (sec->dofs_size < sec->dofs_entsize) {
13005 13024 dtrace_dof_error(dof, "section entry size exceeds total size");
13006 13025 return (NULL);
13007 13026 }
13008 13027
13009 13028 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
13010 13029 dtrace_dof_error(dof, "bad entry size in action description");
13011 13030 return (NULL);
13012 13031 }
13013 13032
13014 13033 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
13015 13034 dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
13016 13035 return (NULL);
13017 13036 }
13018 13037
13019 13038 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
13020 13039 desc = (dof_actdesc_t *)(daddr +
13021 13040 (uintptr_t)sec->dofs_offset + offs);
13022 13041 kind = (dtrace_actkind_t)desc->dofa_kind;
13023 13042
13024 13043 if ((DTRACEACT_ISPRINTFLIKE(kind) &&
13025 13044 (kind != DTRACEACT_PRINTA ||
13026 13045 desc->dofa_strtab != DOF_SECIDX_NONE)) ||
13027 13046 (kind == DTRACEACT_DIFEXPR &&
13028 13047 desc->dofa_strtab != DOF_SECIDX_NONE)) {
13029 13048 dof_sec_t *strtab;
13030 13049 char *str, *fmt;
13031 13050 uint64_t i;
13032 13051
13033 13052 /*
13034 13053 * The argument to these actions is an index into the
13035 13054 * DOF string table. For printf()-like actions, this
13036 13055 * is the format string. For print(), this is the
13037 13056 * CTF type of the expression result.
13038 13057 */
13039 13058 if ((strtab = dtrace_dof_sect(dof,
13040 13059 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
13041 13060 goto err;
13042 13061
13043 13062 str = (char *)((uintptr_t)dof +
13044 13063 (uintptr_t)strtab->dofs_offset);
13045 13064
13046 13065 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
13047 13066 if (str[i] == '\0')
13048 13067 break;
13049 13068 }
13050 13069
13051 13070 if (i >= strtab->dofs_size) {
13052 13071 dtrace_dof_error(dof, "bogus format string");
13053 13072 goto err;
13054 13073 }
13055 13074
13056 13075 if (i == desc->dofa_arg) {
13057 13076 dtrace_dof_error(dof, "empty format string");
13058 13077 goto err;
13059 13078 }
13060 13079
13061 13080 i -= desc->dofa_arg;
13062 13081 fmt = kmem_alloc(i + 1, KM_SLEEP);
13063 13082 bcopy(&str[desc->dofa_arg], fmt, i + 1);
13064 13083 arg = (uint64_t)(uintptr_t)fmt;
13065 13084 } else {
13066 13085 if (kind == DTRACEACT_PRINTA) {
13067 13086 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
13068 13087 arg = 0;
13069 13088 } else {
13070 13089 arg = desc->dofa_arg;
13071 13090 }
13072 13091 }
13073 13092
13074 13093 act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
13075 13094 desc->dofa_uarg, arg);
13076 13095
13077 13096 if (last != NULL) {
13078 13097 last->dtad_next = act;
13079 13098 } else {
13080 13099 first = act;
13081 13100 }
13082 13101
13083 13102 last = act;
13084 13103
13085 13104 if (desc->dofa_difo == DOF_SECIDX_NONE)
13086 13105 continue;
13087 13106
13088 13107 if ((difosec = dtrace_dof_sect(dof,
13089 13108 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
13090 13109 goto err;
13091 13110
13092 13111 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
13093 13112
13094 13113 if (act->dtad_difo == NULL)
13095 13114 goto err;
13096 13115 }
13097 13116
13098 13117 ASSERT(first != NULL);
13099 13118 return (first);
13100 13119
13101 13120 err:
13102 13121 for (act = first; act != NULL; act = next) {
13103 13122 next = act->dtad_next;
13104 13123 dtrace_actdesc_release(act, vstate);
13105 13124 }
13106 13125
13107 13126 return (NULL);
13108 13127 }
13109 13128
13110 13129 static dtrace_ecbdesc_t *
13111 13130 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
13112 13131 cred_t *cr)
13113 13132 {
13114 13133 dtrace_ecbdesc_t *ep;
13115 13134 dof_ecbdesc_t *ecb;
13116 13135 dtrace_probedesc_t *desc;
13117 13136 dtrace_predicate_t *pred = NULL;
13118 13137
13119 13138 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
13120 13139 dtrace_dof_error(dof, "truncated ECB description");
13121 13140 return (NULL);
13122 13141 }
13123 13142
13124 13143 if (sec->dofs_align != sizeof (uint64_t)) {
13125 13144 dtrace_dof_error(dof, "bad alignment in ECB description");
13126 13145 return (NULL);
13127 13146 }
13128 13147
13129 13148 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
13130 13149 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
13131 13150
13132 13151 if (sec == NULL)
13133 13152 return (NULL);
13134 13153
13135 13154 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
13136 13155 ep->dted_uarg = ecb->dofe_uarg;
13137 13156 desc = &ep->dted_probe;
13138 13157
13139 13158 if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
13140 13159 goto err;
13141 13160
13142 13161 if (ecb->dofe_pred != DOF_SECIDX_NONE) {
13143 13162 if ((sec = dtrace_dof_sect(dof,
13144 13163 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
13145 13164 goto err;
13146 13165
13147 13166 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
13148 13167 goto err;
13149 13168
13150 13169 ep->dted_pred.dtpdd_predicate = pred;
13151 13170 }
13152 13171
13153 13172 if (ecb->dofe_actions != DOF_SECIDX_NONE) {
13154 13173 if ((sec = dtrace_dof_sect(dof,
13155 13174 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
13156 13175 goto err;
13157 13176
13158 13177 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
13159 13178
13160 13179 if (ep->dted_action == NULL)
13161 13180 goto err;
13162 13181 }
13163 13182
13164 13183 return (ep);
13165 13184
13166 13185 err:
13167 13186 if (pred != NULL)
13168 13187 dtrace_predicate_release(pred, vstate);
13169 13188 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
13170 13189 return (NULL);
13171 13190 }
13172 13191
13173 13192 /*
13174 13193 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
13175 13194 * specified DOF. At present, this amounts to simply adding 'ubase' to the
13176 13195 * site of any user SETX relocations to account for load object base address.
13177 13196 * In the future, if we need other relocations, this function can be extended.
13178 13197 */
13179 13198 static int
13180 13199 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase)
13181 13200 {
13182 13201 uintptr_t daddr = (uintptr_t)dof;
13183 13202 uintptr_t ts_end;
13184 13203 dof_relohdr_t *dofr =
13185 13204 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
13186 13205 dof_sec_t *ss, *rs, *ts;
13187 13206 dof_relodesc_t *r;
13188 13207 uint_t i, n;
13189 13208
13190 13209 if (sec->dofs_size < sizeof (dof_relohdr_t) ||
13191 13210 sec->dofs_align != sizeof (dof_secidx_t)) {
13192 13211 dtrace_dof_error(dof, "invalid relocation header");
13193 13212 return (-1);
13194 13213 }
13195 13214
13196 13215 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
13197 13216 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
13198 13217 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
13199 13218 ts_end = (uintptr_t)ts + sizeof (dof_sec_t);
13200 13219
13201 13220 if (ss == NULL || rs == NULL || ts == NULL)
13202 13221 return (-1); /* dtrace_dof_error() has been called already */
13203 13222
13204 13223 if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
13205 13224 rs->dofs_align != sizeof (uint64_t)) {
13206 13225 dtrace_dof_error(dof, "invalid relocation section");
13207 13226 return (-1);
13208 13227 }
13209 13228
13210 13229 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
13211 13230 n = rs->dofs_size / rs->dofs_entsize;
13212 13231
13213 13232 for (i = 0; i < n; i++) {
13214 13233 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
13215 13234
13216 13235 switch (r->dofr_type) {
13217 13236 case DOF_RELO_NONE:
13218 13237 break;
13219 13238 case DOF_RELO_SETX:
13220 13239 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
13221 13240 sizeof (uint64_t) > ts->dofs_size) {
13222 13241 dtrace_dof_error(dof, "bad relocation offset");
13223 13242 return (-1);
13224 13243 }
13225 13244
13226 13245 if (taddr >= (uintptr_t)ts && taddr < ts_end) {
13227 13246 dtrace_dof_error(dof, "bad relocation offset");
13228 13247 return (-1);
13229 13248 }
13230 13249
13231 13250 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
13232 13251 dtrace_dof_error(dof, "misaligned setx relo");
13233 13252 return (-1);
13234 13253 }
13235 13254
13236 13255 *(uint64_t *)taddr += ubase;
13237 13256 break;
13238 13257 default:
13239 13258 dtrace_dof_error(dof, "invalid relocation type");
13240 13259 return (-1);
13241 13260 }
13242 13261
13243 13262 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
13244 13263 }
13245 13264
13246 13265 return (0);
13247 13266 }
13248 13267
13249 13268 /*
13250 13269 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
13251 13270 * header: it should be at the front of a memory region that is at least
13252 13271 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
13253 13272 * size. It need not be validated in any other way.
13254 13273 */
13255 13274 static int
13256 13275 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
13257 13276 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
13258 13277 {
13259 13278 uint64_t len = dof->dofh_loadsz, seclen;
13260 13279 uintptr_t daddr = (uintptr_t)dof;
13261 13280 dtrace_ecbdesc_t *ep;
13262 13281 dtrace_enabling_t *enab;
13263 13282 uint_t i;
13264 13283
13265 13284 ASSERT(MUTEX_HELD(&dtrace_lock));
13266 13285 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
13267 13286
13268 13287 /*
13269 13288 * Check the DOF header identification bytes. In addition to checking
13270 13289 * valid settings, we also verify that unused bits/bytes are zeroed so
13271 13290 * we can use them later without fear of regressing existing binaries.
13272 13291 */
13273 13292 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
13274 13293 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
13275 13294 dtrace_dof_error(dof, "DOF magic string mismatch");
13276 13295 return (-1);
13277 13296 }
13278 13297
13279 13298 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
13280 13299 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
13281 13300 dtrace_dof_error(dof, "DOF has invalid data model");
13282 13301 return (-1);
13283 13302 }
13284 13303
13285 13304 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
13286 13305 dtrace_dof_error(dof, "DOF encoding mismatch");
13287 13306 return (-1);
13288 13307 }
13289 13308
13290 13309 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
13291 13310 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
13292 13311 dtrace_dof_error(dof, "DOF version mismatch");
13293 13312 return (-1);
13294 13313 }
13295 13314
13296 13315 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
13297 13316 dtrace_dof_error(dof, "DOF uses unsupported instruction set");
13298 13317 return (-1);
13299 13318 }
13300 13319
13301 13320 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
13302 13321 dtrace_dof_error(dof, "DOF uses too many integer registers");
13303 13322 return (-1);
13304 13323 }
13305 13324
13306 13325 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
13307 13326 dtrace_dof_error(dof, "DOF uses too many tuple registers");
13308 13327 return (-1);
13309 13328 }
13310 13329
13311 13330 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
13312 13331 if (dof->dofh_ident[i] != 0) {
13313 13332 dtrace_dof_error(dof, "DOF has invalid ident byte set");
13314 13333 return (-1);
13315 13334 }
13316 13335 }
13317 13336
13318 13337 if (dof->dofh_flags & ~DOF_FL_VALID) {
13319 13338 dtrace_dof_error(dof, "DOF has invalid flag bits set");
13320 13339 return (-1);
13321 13340 }
13322 13341
13323 13342 if (dof->dofh_secsize == 0) {
13324 13343 dtrace_dof_error(dof, "zero section header size");
13325 13344 return (-1);
13326 13345 }
13327 13346
13328 13347 /*
13329 13348 * Check that the section headers don't exceed the amount of DOF
13330 13349 * data. Note that we cast the section size and number of sections
13331 13350 * to uint64_t's to prevent possible overflow in the multiplication.
13332 13351 */
13333 13352 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
13334 13353
13335 13354 if (dof->dofh_secoff > len || seclen > len ||
13336 13355 dof->dofh_secoff + seclen > len) {
13337 13356 dtrace_dof_error(dof, "truncated section headers");
13338 13357 return (-1);
13339 13358 }
13340 13359
13341 13360 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
13342 13361 dtrace_dof_error(dof, "misaligned section headers");
13343 13362 return (-1);
13344 13363 }
13345 13364
13346 13365 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
13347 13366 dtrace_dof_error(dof, "misaligned section size");
13348 13367 return (-1);
13349 13368 }
13350 13369
13351 13370 /*
13352 13371 * Take an initial pass through the section headers to be sure that
13353 13372 * the headers don't have stray offsets. If the 'noprobes' flag is
13354 13373 * set, do not permit sections relating to providers, probes, or args.
13355 13374 */
13356 13375 for (i = 0; i < dof->dofh_secnum; i++) {
13357 13376 dof_sec_t *sec = (dof_sec_t *)(daddr +
13358 13377 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13359 13378
13360 13379 if (noprobes) {
13361 13380 switch (sec->dofs_type) {
13362 13381 case DOF_SECT_PROVIDER:
13363 13382 case DOF_SECT_PROBES:
13364 13383 case DOF_SECT_PRARGS:
13365 13384 case DOF_SECT_PROFFS:
13366 13385 dtrace_dof_error(dof, "illegal sections "
13367 13386 "for enabling");
13368 13387 return (-1);
13369 13388 }
13370 13389 }
13371 13390
13372 13391 if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
13373 13392 !(sec->dofs_flags & DOF_SECF_LOAD)) {
13374 13393 dtrace_dof_error(dof, "loadable section with load "
13375 13394 "flag unset");
13376 13395 return (-1);
13377 13396 }
13378 13397
13379 13398 if (!(sec->dofs_flags & DOF_SECF_LOAD))
13380 13399 continue; /* just ignore non-loadable sections */
13381 13400
13382 13401 if (!ISP2(sec->dofs_align)) {
13383 13402 dtrace_dof_error(dof, "bad section alignment");
13384 13403 return (-1);
13385 13404 }
13386 13405
13387 13406 if (sec->dofs_offset & (sec->dofs_align - 1)) {
13388 13407 dtrace_dof_error(dof, "misaligned section");
13389 13408 return (-1);
13390 13409 }
13391 13410
13392 13411 if (sec->dofs_offset > len || sec->dofs_size > len ||
13393 13412 sec->dofs_offset + sec->dofs_size > len) {
13394 13413 dtrace_dof_error(dof, "corrupt section header");
13395 13414 return (-1);
13396 13415 }
13397 13416
13398 13417 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
13399 13418 sec->dofs_offset + sec->dofs_size - 1) != '\0') {
13400 13419 dtrace_dof_error(dof, "non-terminating string table");
13401 13420 return (-1);
13402 13421 }
13403 13422 }
13404 13423
13405 13424 /*
13406 13425 * Take a second pass through the sections and locate and perform any
13407 13426 * relocations that are present. We do this after the first pass to
13408 13427 * be sure that all sections have had their headers validated.
13409 13428 */
13410 13429 for (i = 0; i < dof->dofh_secnum; i++) {
13411 13430 dof_sec_t *sec = (dof_sec_t *)(daddr +
13412 13431 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13413 13432
13414 13433 if (!(sec->dofs_flags & DOF_SECF_LOAD))
13415 13434 continue; /* skip sections that are not loadable */
13416 13435
13417 13436 switch (sec->dofs_type) {
13418 13437 case DOF_SECT_URELHDR:
13419 13438 if (dtrace_dof_relocate(dof, sec, ubase) != 0)
13420 13439 return (-1);
13421 13440 break;
13422 13441 }
13423 13442 }
13424 13443
13425 13444 if ((enab = *enabp) == NULL)
13426 13445 enab = *enabp = dtrace_enabling_create(vstate);
13427 13446
13428 13447 for (i = 0; i < dof->dofh_secnum; i++) {
13429 13448 dof_sec_t *sec = (dof_sec_t *)(daddr +
13430 13449 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13431 13450
13432 13451 if (sec->dofs_type != DOF_SECT_ECBDESC)
13433 13452 continue;
13434 13453
13435 13454 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
13436 13455 dtrace_enabling_destroy(enab);
13437 13456 *enabp = NULL;
13438 13457 return (-1);
13439 13458 }
13440 13459
13441 13460 dtrace_enabling_add(enab, ep);
13442 13461 }
13443 13462
13444 13463 return (0);
13445 13464 }
13446 13465
13447 13466 /*
13448 13467 * Process DOF for any options. This routine assumes that the DOF has been
13449 13468 * at least processed by dtrace_dof_slurp().
13450 13469 */
13451 13470 static int
13452 13471 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
13453 13472 {
13454 13473 int i, rval;
13455 13474 uint32_t entsize;
13456 13475 size_t offs;
13457 13476 dof_optdesc_t *desc;
13458 13477
13459 13478 for (i = 0; i < dof->dofh_secnum; i++) {
13460 13479 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
13461 13480 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13462 13481
13463 13482 if (sec->dofs_type != DOF_SECT_OPTDESC)
13464 13483 continue;
13465 13484
13466 13485 if (sec->dofs_align != sizeof (uint64_t)) {
13467 13486 dtrace_dof_error(dof, "bad alignment in "
13468 13487 "option description");
13469 13488 return (EINVAL);
13470 13489 }
13471 13490
13472 13491 if ((entsize = sec->dofs_entsize) == 0) {
13473 13492 dtrace_dof_error(dof, "zeroed option entry size");
13474 13493 return (EINVAL);
13475 13494 }
13476 13495
13477 13496 if (entsize < sizeof (dof_optdesc_t)) {
13478 13497 dtrace_dof_error(dof, "bad option entry size");
13479 13498 return (EINVAL);
13480 13499 }
13481 13500
13482 13501 for (offs = 0; offs < sec->dofs_size; offs += entsize) {
13483 13502 desc = (dof_optdesc_t *)((uintptr_t)dof +
13484 13503 (uintptr_t)sec->dofs_offset + offs);
13485 13504
13486 13505 if (desc->dofo_strtab != DOF_SECIDX_NONE) {
13487 13506 dtrace_dof_error(dof, "non-zero option string");
13488 13507 return (EINVAL);
13489 13508 }
13490 13509
13491 13510 if (desc->dofo_value == DTRACEOPT_UNSET) {
13492 13511 dtrace_dof_error(dof, "unset option");
13493 13512 return (EINVAL);
13494 13513 }
13495 13514
13496 13515 if ((rval = dtrace_state_option(state,
13497 13516 desc->dofo_option, desc->dofo_value)) != 0) {
13498 13517 dtrace_dof_error(dof, "rejected option");
13499 13518 return (rval);
13500 13519 }
13501 13520 }
13502 13521 }
13503 13522
13504 13523 return (0);
13505 13524 }
13506 13525
13507 13526 /*
13508 13527 * DTrace Consumer State Functions
13509 13528 */
13510 13529 int
13511 13530 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
13512 13531 {
13513 13532 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
13514 13533 void *base;
13515 13534 uintptr_t limit;
13516 13535 dtrace_dynvar_t *dvar, *next, *start;
13517 13536 int i;
13518 13537
13519 13538 ASSERT(MUTEX_HELD(&dtrace_lock));
13520 13539 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
13521 13540
13522 13541 bzero(dstate, sizeof (dtrace_dstate_t));
13523 13542
13524 13543 if ((dstate->dtds_chunksize = chunksize) == 0)
13525 13544 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
13526 13545
13527 13546 VERIFY(dstate->dtds_chunksize < LONG_MAX);
13528 13547
13529 13548 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
13530 13549 size = min;
13531 13550
13532 13551 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
13533 13552 return (ENOMEM);
13534 13553
13535 13554 dstate->dtds_size = size;
13536 13555 dstate->dtds_base = base;
13537 13556 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
13538 13557 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
13539 13558
13540 13559 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
13541 13560
13542 13561 if (hashsize != 1 && (hashsize & 1))
13543 13562 hashsize--;
13544 13563
13545 13564 dstate->dtds_hashsize = hashsize;
13546 13565 dstate->dtds_hash = dstate->dtds_base;
13547 13566
13548 13567 /*
13549 13568 * Set all of our hash buckets to point to the single sink, and (if
13550 13569 * it hasn't already been set), set the sink's hash value to be the
13551 13570 * sink sentinel value. The sink is needed for dynamic variable
13552 13571 * lookups to know that they have iterated over an entire, valid hash
13553 13572 * chain.
13554 13573 */
13555 13574 for (i = 0; i < hashsize; i++)
13556 13575 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
13557 13576
13558 13577 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
13559 13578 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
13560 13579
13561 13580 /*
13562 13581 * Determine number of active CPUs. Divide free list evenly among
13563 13582 * active CPUs.
13564 13583 */
13565 13584 start = (dtrace_dynvar_t *)
13566 13585 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
13567 13586 limit = (uintptr_t)base + size;
13568 13587
13569 13588 VERIFY((uintptr_t)start < limit);
13570 13589 VERIFY((uintptr_t)start >= (uintptr_t)base);
13571 13590
13572 13591 maxper = (limit - (uintptr_t)start) / NCPU;
13573 13592 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
13574 13593
13575 13594 for (i = 0; i < NCPU; i++) {
13576 13595 dstate->dtds_percpu[i].dtdsc_free = dvar = start;
13577 13596
13578 13597 /*
13579 13598 * If we don't even have enough chunks to make it once through
13580 13599 * NCPUs, we're just going to allocate everything to the first
13581 13600 * CPU. And if we're on the last CPU, we're going to allocate
13582 13601 * whatever is left over. In either case, we set the limit to
13583 13602 * be the limit of the dynamic variable space.
13584 13603 */
13585 13604 if (maxper == 0 || i == NCPU - 1) {
13586 13605 limit = (uintptr_t)base + size;
13587 13606 start = NULL;
13588 13607 } else {
13589 13608 limit = (uintptr_t)start + maxper;
13590 13609 start = (dtrace_dynvar_t *)limit;
13591 13610 }
13592 13611
13593 13612 VERIFY(limit <= (uintptr_t)base + size);
13594 13613
13595 13614 for (;;) {
13596 13615 next = (dtrace_dynvar_t *)((uintptr_t)dvar +
13597 13616 dstate->dtds_chunksize);
13598 13617
13599 13618 if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
13600 13619 break;
13601 13620
13602 13621 VERIFY((uintptr_t)dvar >= (uintptr_t)base &&
13603 13622 (uintptr_t)dvar <= (uintptr_t)base + size);
13604 13623 dvar->dtdv_next = next;
13605 13624 dvar = next;
13606 13625 }
13607 13626
13608 13627 if (maxper == 0)
13609 13628 break;
13610 13629 }
13611 13630
13612 13631 return (0);
13613 13632 }
13614 13633
13615 13634 void
13616 13635 dtrace_dstate_fini(dtrace_dstate_t *dstate)
13617 13636 {
13618 13637 ASSERT(MUTEX_HELD(&cpu_lock));
13619 13638
13620 13639 if (dstate->dtds_base == NULL)
13621 13640 return;
13622 13641
13623 13642 kmem_free(dstate->dtds_base, dstate->dtds_size);
13624 13643 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
13625 13644 }
13626 13645
13627 13646 static void
13628 13647 dtrace_vstate_fini(dtrace_vstate_t *vstate)
13629 13648 {
13630 13649 /*
13631 13650 * Logical XOR, where are you?
13632 13651 */
13633 13652 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
13634 13653
13635 13654 if (vstate->dtvs_nglobals > 0) {
13636 13655 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
13637 13656 sizeof (dtrace_statvar_t *));
13638 13657 }
13639 13658
13640 13659 if (vstate->dtvs_ntlocals > 0) {
13641 13660 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
13642 13661 sizeof (dtrace_difv_t));
13643 13662 }
13644 13663
13645 13664 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
13646 13665
13647 13666 if (vstate->dtvs_nlocals > 0) {
13648 13667 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
13649 13668 sizeof (dtrace_statvar_t *));
13650 13669 }
13651 13670 }
13652 13671
13653 13672 static void
13654 13673 dtrace_state_clean(dtrace_state_t *state)
13655 13674 {
13656 13675 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
13657 13676 return;
13658 13677
13659 13678 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
13660 13679 dtrace_speculation_clean(state);
13661 13680 }
13662 13681
13663 13682 static void
13664 13683 dtrace_state_deadman(dtrace_state_t *state)
13665 13684 {
13666 13685 hrtime_t now;
13667 13686
13668 13687 dtrace_sync();
13669 13688
13670 13689 now = dtrace_gethrtime();
13671 13690
13672 13691 if (state != dtrace_anon.dta_state &&
13673 13692 now - state->dts_laststatus >= dtrace_deadman_user)
13674 13693 return;
13675 13694
13676 13695 /*
13677 13696 * We must be sure that dts_alive never appears to be less than the
13678 13697 * value upon entry to dtrace_state_deadman(), and because we lack a
13679 13698 * dtrace_cas64(), we cannot store to it atomically. We thus instead
13680 13699 * store INT64_MAX to it, followed by a memory barrier, followed by
13681 13700 * the new value. This assures that dts_alive never appears to be
13682 13701 * less than its true value, regardless of the order in which the
13683 13702 * stores to the underlying storage are issued.
13684 13703 */
13685 13704 state->dts_alive = INT64_MAX;
13686 13705 dtrace_membar_producer();
13687 13706 state->dts_alive = now;
13688 13707 }
13689 13708
13690 13709 dtrace_state_t *
13691 13710 dtrace_state_create(dev_t *devp, cred_t *cr)
13692 13711 {
13693 13712 minor_t minor;
13694 13713 major_t major;
13695 13714 char c[30];
13696 13715 dtrace_state_t *state;
13697 13716 dtrace_optval_t *opt;
13698 13717 int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
13699 13718
13700 13719 ASSERT(MUTEX_HELD(&dtrace_lock));
13701 13720 ASSERT(MUTEX_HELD(&cpu_lock));
13702 13721
13703 13722 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
13704 13723 VM_BESTFIT | VM_SLEEP);
13705 13724
13706 13725 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
13707 13726 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
13708 13727 return (NULL);
13709 13728 }
13710 13729
13711 13730 state = ddi_get_soft_state(dtrace_softstate, minor);
13712 13731 state->dts_epid = DTRACE_EPIDNONE + 1;
13713 13732
13714 13733 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
13715 13734 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
13716 13735 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
13717 13736
13718 13737 if (devp != NULL) {
13719 13738 major = getemajor(*devp);
13720 13739 } else {
13721 13740 major = ddi_driver_major(dtrace_devi);
13722 13741 }
13723 13742
13724 13743 state->dts_dev = makedevice(major, minor);
13725 13744
13726 13745 if (devp != NULL)
13727 13746 *devp = state->dts_dev;
13728 13747
13729 13748 /*
13730 13749 * We allocate NCPU buffers. On the one hand, this can be quite
13731 13750 * a bit of memory per instance (nearly 36K on a Starcat). On the
13732 13751 * other hand, it saves an additional memory reference in the probe
13733 13752 * path.
13734 13753 */
13735 13754 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
13736 13755 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
13737 13756 state->dts_cleaner = CYCLIC_NONE;
13738 13757 state->dts_deadman = CYCLIC_NONE;
13739 13758 state->dts_vstate.dtvs_state = state;
13740 13759
13741 13760 for (i = 0; i < DTRACEOPT_MAX; i++)
13742 13761 state->dts_options[i] = DTRACEOPT_UNSET;
13743 13762
13744 13763 /*
13745 13764 * Set the default options.
13746 13765 */
13747 13766 opt = state->dts_options;
13748 13767 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
13749 13768 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
13750 13769 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
13751 13770 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
13752 13771 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
13753 13772 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
13754 13773 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
13755 13774 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
13756 13775 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
13757 13776 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
13758 13777 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
13759 13778 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
13760 13779 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
13761 13780 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
13762 13781
13763 13782 state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
13764 13783
13765 13784 /*
13766 13785 * Depending on the user credentials, we set flag bits which alter probe
13767 13786 * visibility or the amount of destructiveness allowed. In the case of
13768 13787 * actual anonymous tracing, or the possession of all privileges, all of
13769 13788 * the normal checks are bypassed.
13770 13789 */
13771 13790 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
13772 13791 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
13773 13792 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
13774 13793 } else {
13775 13794 /*
13776 13795 * Set up the credentials for this instantiation. We take a
13777 13796 * hold on the credential to prevent it from disappearing on
13778 13797 * us; this in turn prevents the zone_t referenced by this
13779 13798 * credential from disappearing. This means that we can
13780 13799 * examine the credential and the zone from probe context.
13781 13800 */
13782 13801 crhold(cr);
13783 13802 state->dts_cred.dcr_cred = cr;
13784 13803
13785 13804 /*
13786 13805 * CRA_PROC means "we have *some* privilege for dtrace" and
13787 13806 * unlocks the use of variables like pid, zonename, etc.
13788 13807 */
13789 13808 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
13790 13809 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
13791 13810 state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
13792 13811 }
13793 13812
13794 13813 /*
13795 13814 * dtrace_user allows use of syscall and profile providers.
13796 13815 * If the user also has proc_owner and/or proc_zone, we
13797 13816 * extend the scope to include additional visibility and
13798 13817 * destructive power.
13799 13818 */
13800 13819 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
13801 13820 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
13802 13821 state->dts_cred.dcr_visible |=
13803 13822 DTRACE_CRV_ALLPROC;
13804 13823
13805 13824 state->dts_cred.dcr_action |=
13806 13825 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13807 13826 }
13808 13827
13809 13828 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
13810 13829 state->dts_cred.dcr_visible |=
13811 13830 DTRACE_CRV_ALLZONE;
13812 13831
13813 13832 state->dts_cred.dcr_action |=
13814 13833 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13815 13834 }
13816 13835
13817 13836 /*
13818 13837 * If we have all privs in whatever zone this is,
13819 13838 * we can do destructive things to processes which
13820 13839 * have altered credentials.
13821 13840 */
13822 13841 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
13823 13842 cr->cr_zone->zone_privset)) {
13824 13843 state->dts_cred.dcr_action |=
13825 13844 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
13826 13845 }
13827 13846 }
13828 13847
13829 13848 /*
13830 13849 * Holding the dtrace_kernel privilege also implies that
13831 13850 * the user has the dtrace_user privilege from a visibility
13832 13851 * perspective. But without further privileges, some
13833 13852 * destructive actions are not available.
13834 13853 */
13835 13854 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
13836 13855 /*
13837 13856 * Make all probes in all zones visible. However,
13838 13857 * this doesn't mean that all actions become available
13839 13858 * to all zones.
13840 13859 */
13841 13860 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
13842 13861 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
13843 13862
13844 13863 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
13845 13864 DTRACE_CRA_PROC;
13846 13865 /*
13847 13866 * Holding proc_owner means that destructive actions
13848 13867 * for *this* zone are allowed.
13849 13868 */
13850 13869 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
13851 13870 state->dts_cred.dcr_action |=
13852 13871 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13853 13872
13854 13873 /*
13855 13874 * Holding proc_zone means that destructive actions
13856 13875 * for this user/group ID in all zones is allowed.
13857 13876 */
13858 13877 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
13859 13878 state->dts_cred.dcr_action |=
13860 13879 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13861 13880
13862 13881 /*
13863 13882 * If we have all privs in whatever zone this is,
13864 13883 * we can do destructive things to processes which
13865 13884 * have altered credentials.
13866 13885 */
13867 13886 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
13868 13887 cr->cr_zone->zone_privset)) {
13869 13888 state->dts_cred.dcr_action |=
13870 13889 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
13871 13890 }
13872 13891 }
13873 13892
13874 13893 /*
13875 13894 * Holding the dtrace_proc privilege gives control over fasttrap
13876 13895 * and pid providers. We need to grant wider destructive
13877 13896 * privileges in the event that the user has proc_owner and/or
13878 13897 * proc_zone.
13879 13898 */
13880 13899 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
13881 13900 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
13882 13901 state->dts_cred.dcr_action |=
13883 13902 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13884 13903
13885 13904 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
13886 13905 state->dts_cred.dcr_action |=
13887 13906 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13888 13907 }
13889 13908 }
13890 13909
13891 13910 return (state);
13892 13911 }
13893 13912
13894 13913 static int
13895 13914 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
13896 13915 {
13897 13916 dtrace_optval_t *opt = state->dts_options, size;
13898 13917 processorid_t cpu;
13899 13918 int flags = 0, rval, factor, divisor = 1;
13900 13919
13901 13920 ASSERT(MUTEX_HELD(&dtrace_lock));
13902 13921 ASSERT(MUTEX_HELD(&cpu_lock));
13903 13922 ASSERT(which < DTRACEOPT_MAX);
13904 13923 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
13905 13924 (state == dtrace_anon.dta_state &&
13906 13925 state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
13907 13926
13908 13927 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
13909 13928 return (0);
13910 13929
13911 13930 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
13912 13931 cpu = opt[DTRACEOPT_CPU];
13913 13932
13914 13933 if (which == DTRACEOPT_SPECSIZE)
13915 13934 flags |= DTRACEBUF_NOSWITCH;
13916 13935
13917 13936 if (which == DTRACEOPT_BUFSIZE) {
13918 13937 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
13919 13938 flags |= DTRACEBUF_RING;
13920 13939
13921 13940 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
13922 13941 flags |= DTRACEBUF_FILL;
13923 13942
13924 13943 if (state != dtrace_anon.dta_state ||
13925 13944 state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
13926 13945 flags |= DTRACEBUF_INACTIVE;
13927 13946 }
13928 13947
13929 13948 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
13930 13949 /*
13931 13950 * The size must be 8-byte aligned. If the size is not 8-byte
13932 13951 * aligned, drop it down by the difference.
13933 13952 */
13934 13953 if (size & (sizeof (uint64_t) - 1))
13935 13954 size -= size & (sizeof (uint64_t) - 1);
13936 13955
13937 13956 if (size < state->dts_reserve) {
13938 13957 /*
13939 13958 * Buffers always must be large enough to accommodate
13940 13959 * their prereserved space. We return E2BIG instead
13941 13960 * of ENOMEM in this case to allow for user-level
13942 13961 * software to differentiate the cases.
13943 13962 */
13944 13963 return (E2BIG);
13945 13964 }
13946 13965
13947 13966 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
13948 13967
13949 13968 if (rval != ENOMEM) {
13950 13969 opt[which] = size;
13951 13970 return (rval);
13952 13971 }
13953 13972
13954 13973 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13955 13974 return (rval);
13956 13975
13957 13976 for (divisor = 2; divisor < factor; divisor <<= 1)
13958 13977 continue;
13959 13978 }
13960 13979
13961 13980 return (ENOMEM);
13962 13981 }
13963 13982
13964 13983 static int
13965 13984 dtrace_state_buffers(dtrace_state_t *state)
13966 13985 {
13967 13986 dtrace_speculation_t *spec = state->dts_speculations;
13968 13987 int rval, i;
13969 13988
13970 13989 if ((rval = dtrace_state_buffer(state, state->dts_buffer,
13971 13990 DTRACEOPT_BUFSIZE)) != 0)
13972 13991 return (rval);
13973 13992
13974 13993 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
13975 13994 DTRACEOPT_AGGSIZE)) != 0)
13976 13995 return (rval);
13977 13996
13978 13997 for (i = 0; i < state->dts_nspeculations; i++) {
13979 13998 if ((rval = dtrace_state_buffer(state,
13980 13999 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
13981 14000 return (rval);
13982 14001 }
13983 14002
13984 14003 return (0);
13985 14004 }
13986 14005
13987 14006 static void
13988 14007 dtrace_state_prereserve(dtrace_state_t *state)
13989 14008 {
13990 14009 dtrace_ecb_t *ecb;
13991 14010 dtrace_probe_t *probe;
13992 14011
13993 14012 state->dts_reserve = 0;
13994 14013
13995 14014 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
13996 14015 return;
13997 14016
13998 14017 /*
13999 14018 * If our buffer policy is a "fill" buffer policy, we need to set the
14000 14019 * prereserved space to be the space required by the END probes.
14001 14020 */
14002 14021 probe = dtrace_probes[dtrace_probeid_end - 1];
14003 14022 ASSERT(probe != NULL);
14004 14023
14005 14024 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
14006 14025 if (ecb->dte_state != state)
14007 14026 continue;
14008 14027
14009 14028 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
14010 14029 }
14011 14030 }
14012 14031
14013 14032 static int
14014 14033 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
14015 14034 {
14016 14035 dtrace_optval_t *opt = state->dts_options, sz, nspec;
14017 14036 dtrace_speculation_t *spec;
14018 14037 dtrace_buffer_t *buf;
14019 14038 cyc_handler_t hdlr;
14020 14039 cyc_time_t when;
14021 14040 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
14022 14041 dtrace_icookie_t cookie;
14023 14042
14024 14043 mutex_enter(&cpu_lock);
14025 14044 mutex_enter(&dtrace_lock);
14026 14045
14027 14046 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
14028 14047 rval = EBUSY;
14029 14048 goto out;
14030 14049 }
14031 14050
14032 14051 /*
14033 14052 * Before we can perform any checks, we must prime all of the
14034 14053 * retained enablings that correspond to this state.
14035 14054 */
14036 14055 dtrace_enabling_prime(state);
14037 14056
14038 14057 if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
14039 14058 rval = EACCES;
14040 14059 goto out;
14041 14060 }
14042 14061
14043 14062 dtrace_state_prereserve(state);
14044 14063
14045 14064 /*
14046 14065 * Now we want to do is try to allocate our speculations.
14047 14066 * We do not automatically resize the number of speculations; if
14048 14067 * this fails, we will fail the operation.
14049 14068 */
14050 14069 nspec = opt[DTRACEOPT_NSPEC];
14051 14070 ASSERT(nspec != DTRACEOPT_UNSET);
14052 14071
14053 14072 if (nspec > INT_MAX) {
14054 14073 rval = ENOMEM;
14055 14074 goto out;
14056 14075 }
14057 14076
14058 14077 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
14059 14078 KM_NOSLEEP | KM_NORMALPRI);
14060 14079
14061 14080 if (spec == NULL) {
14062 14081 rval = ENOMEM;
14063 14082 goto out;
14064 14083 }
14065 14084
14066 14085 state->dts_speculations = spec;
14067 14086 state->dts_nspeculations = (int)nspec;
14068 14087
14069 14088 for (i = 0; i < nspec; i++) {
14070 14089 if ((buf = kmem_zalloc(bufsize,
14071 14090 KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
14072 14091 rval = ENOMEM;
14073 14092 goto err;
14074 14093 }
14075 14094
14076 14095 spec[i].dtsp_buffer = buf;
14077 14096 }
14078 14097
14079 14098 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
14080 14099 if (dtrace_anon.dta_state == NULL) {
14081 14100 rval = ENOENT;
14082 14101 goto out;
14083 14102 }
14084 14103
14085 14104 if (state->dts_necbs != 0) {
14086 14105 rval = EALREADY;
14087 14106 goto out;
14088 14107 }
14089 14108
14090 14109 state->dts_anon = dtrace_anon_grab();
14091 14110 ASSERT(state->dts_anon != NULL);
14092 14111 state = state->dts_anon;
14093 14112
14094 14113 /*
14095 14114 * We want "grabanon" to be set in the grabbed state, so we'll
14096 14115 * copy that option value from the grabbing state into the
14097 14116 * grabbed state.
14098 14117 */
14099 14118 state->dts_options[DTRACEOPT_GRABANON] =
14100 14119 opt[DTRACEOPT_GRABANON];
14101 14120
14102 14121 *cpu = dtrace_anon.dta_beganon;
14103 14122
14104 14123 /*
14105 14124 * If the anonymous state is active (as it almost certainly
14106 14125 * is if the anonymous enabling ultimately matched anything),
14107 14126 * we don't allow any further option processing -- but we
14108 14127 * don't return failure.
14109 14128 */
14110 14129 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
14111 14130 goto out;
14112 14131 }
14113 14132
14114 14133 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
14115 14134 opt[DTRACEOPT_AGGSIZE] != 0) {
14116 14135 if (state->dts_aggregations == NULL) {
14117 14136 /*
14118 14137 * We're not going to create an aggregation buffer
14119 14138 * because we don't have any ECBs that contain
14120 14139 * aggregations -- set this option to 0.
14121 14140 */
14122 14141 opt[DTRACEOPT_AGGSIZE] = 0;
14123 14142 } else {
14124 14143 /*
14125 14144 * If we have an aggregation buffer, we must also have
14126 14145 * a buffer to use as scratch.
14127 14146 */
14128 14147 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
14129 14148 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
14130 14149 opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
14131 14150 }
14132 14151 }
14133 14152 }
14134 14153
14135 14154 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
14136 14155 opt[DTRACEOPT_SPECSIZE] != 0) {
14137 14156 if (!state->dts_speculates) {
14138 14157 /*
14139 14158 * We're not going to create speculation buffers
14140 14159 * because we don't have any ECBs that actually
14141 14160 * speculate -- set the speculation size to 0.
14142 14161 */
14143 14162 opt[DTRACEOPT_SPECSIZE] = 0;
14144 14163 }
14145 14164 }
14146 14165
14147 14166 /*
14148 14167 * The bare minimum size for any buffer that we're actually going to
14149 14168 * do anything to is sizeof (uint64_t).
14150 14169 */
14151 14170 sz = sizeof (uint64_t);
14152 14171
14153 14172 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
14154 14173 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
14155 14174 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
14156 14175 /*
14157 14176 * A buffer size has been explicitly set to 0 (or to a size
14158 14177 * that will be adjusted to 0) and we need the space -- we
14159 14178 * need to return failure. We return ENOSPC to differentiate
14160 14179 * it from failing to allocate a buffer due to failure to meet
14161 14180 * the reserve (for which we return E2BIG).
14162 14181 */
14163 14182 rval = ENOSPC;
14164 14183 goto out;
14165 14184 }
14166 14185
14167 14186 if ((rval = dtrace_state_buffers(state)) != 0)
14168 14187 goto err;
14169 14188
14170 14189 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
14171 14190 sz = dtrace_dstate_defsize;
14172 14191
14173 14192 do {
14174 14193 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
14175 14194
14176 14195 if (rval == 0)
14177 14196 break;
14178 14197
14179 14198 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
14180 14199 goto err;
14181 14200 } while (sz >>= 1);
14182 14201
14183 14202 opt[DTRACEOPT_DYNVARSIZE] = sz;
14184 14203
14185 14204 if (rval != 0)
14186 14205 goto err;
14187 14206
14188 14207 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
14189 14208 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
14190 14209
14191 14210 if (opt[DTRACEOPT_CLEANRATE] == 0)
14192 14211 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
14193 14212
14194 14213 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
14195 14214 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
14196 14215
14197 14216 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
14198 14217 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
14199 14218
14200 14219 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
14201 14220 hdlr.cyh_arg = state;
14202 14221 hdlr.cyh_level = CY_LOW_LEVEL;
14203 14222
14204 14223 when.cyt_when = 0;
14205 14224 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
14206 14225
14207 14226 state->dts_cleaner = cyclic_add(&hdlr, &when);
14208 14227
14209 14228 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
14210 14229 hdlr.cyh_arg = state;
14211 14230 hdlr.cyh_level = CY_LOW_LEVEL;
14212 14231
14213 14232 when.cyt_when = 0;
14214 14233 when.cyt_interval = dtrace_deadman_interval;
14215 14234
14216 14235 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
14217 14236 state->dts_deadman = cyclic_add(&hdlr, &when);
14218 14237
14219 14238 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
14220 14239
14221 14240 if (state->dts_getf != 0 &&
14222 14241 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
14223 14242 /*
14224 14243 * We don't have kernel privs but we have at least one call
14225 14244 * to getf(); we need to bump our zone's count, and (if
14226 14245 * this is the first enabling to have an unprivileged call
14227 14246 * to getf()) we need to hook into closef().
14228 14247 */
14229 14248 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
14230 14249
14231 14250 if (dtrace_getf++ == 0) {
14232 14251 ASSERT(dtrace_closef == NULL);
14233 14252 dtrace_closef = dtrace_getf_barrier;
14234 14253 }
14235 14254 }
14236 14255
14237 14256 /*
14238 14257 * Now it's time to actually fire the BEGIN probe. We need to disable
14239 14258 * interrupts here both to record the CPU on which we fired the BEGIN
14240 14259 * probe (the data from this CPU will be processed first at user
14241 14260 * level) and to manually activate the buffer for this CPU.
14242 14261 */
14243 14262 cookie = dtrace_interrupt_disable();
14244 14263 *cpu = CPU->cpu_id;
14245 14264 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
14246 14265 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
14247 14266
14248 14267 dtrace_probe(dtrace_probeid_begin,
14249 14268 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
14250 14269 dtrace_interrupt_enable(cookie);
14251 14270 /*
14252 14271 * We may have had an exit action from a BEGIN probe; only change our
14253 14272 * state to ACTIVE if we're still in WARMUP.
14254 14273 */
14255 14274 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
14256 14275 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
14257 14276
14258 14277 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
14259 14278 state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
14260 14279
14261 14280 /*
14262 14281 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
14263 14282 * want each CPU to transition its principal buffer out of the
14264 14283 * INACTIVE state. Doing this assures that no CPU will suddenly begin
14265 14284 * processing an ECB halfway down a probe's ECB chain; all CPUs will
14266 14285 * atomically transition from processing none of a state's ECBs to
14267 14286 * processing all of them.
14268 14287 */
14269 14288 dtrace_xcall(DTRACE_CPUALL,
14270 14289 (dtrace_xcall_t)dtrace_buffer_activate, state);
14271 14290 goto out;
14272 14291
14273 14292 err:
14274 14293 dtrace_buffer_free(state->dts_buffer);
14275 14294 dtrace_buffer_free(state->dts_aggbuffer);
14276 14295
14277 14296 if ((nspec = state->dts_nspeculations) == 0) {
14278 14297 ASSERT(state->dts_speculations == NULL);
14279 14298 goto out;
14280 14299 }
14281 14300
14282 14301 spec = state->dts_speculations;
14283 14302 ASSERT(spec != NULL);
14284 14303
14285 14304 for (i = 0; i < state->dts_nspeculations; i++) {
14286 14305 if ((buf = spec[i].dtsp_buffer) == NULL)
14287 14306 break;
14288 14307
14289 14308 dtrace_buffer_free(buf);
14290 14309 kmem_free(buf, bufsize);
14291 14310 }
14292 14311
14293 14312 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
14294 14313 state->dts_nspeculations = 0;
14295 14314 state->dts_speculations = NULL;
14296 14315
14297 14316 out:
14298 14317 mutex_exit(&dtrace_lock);
14299 14318 mutex_exit(&cpu_lock);
14300 14319
14301 14320 return (rval);
14302 14321 }
14303 14322
14304 14323 static int
14305 14324 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
14306 14325 {
14307 14326 dtrace_icookie_t cookie;
14308 14327
14309 14328 ASSERT(MUTEX_HELD(&dtrace_lock));
14310 14329
14311 14330 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
14312 14331 state->dts_activity != DTRACE_ACTIVITY_DRAINING)
14313 14332 return (EINVAL);
14314 14333
14315 14334 /*
14316 14335 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
14317 14336 * to be sure that every CPU has seen it. See below for the details
14318 14337 * on why this is done.
14319 14338 */
14320 14339 state->dts_activity = DTRACE_ACTIVITY_DRAINING;
14321 14340 dtrace_sync();
14322 14341
14323 14342 /*
14324 14343 * By this point, it is impossible for any CPU to be still processing
14325 14344 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
14326 14345 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
14327 14346 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
14328 14347 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
14329 14348 * iff we're in the END probe.
14330 14349 */
14331 14350 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
14332 14351 dtrace_sync();
14333 14352 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
14334 14353
14335 14354 /*
14336 14355 * Finally, we can release the reserve and call the END probe. We
14337 14356 * disable interrupts across calling the END probe to allow us to
14338 14357 * return the CPU on which we actually called the END probe. This
14339 14358 * allows user-land to be sure that this CPU's principal buffer is
14340 14359 * processed last.
14341 14360 */
14342 14361 state->dts_reserve = 0;
14343 14362
14344 14363 cookie = dtrace_interrupt_disable();
14345 14364 *cpu = CPU->cpu_id;
14346 14365 dtrace_probe(dtrace_probeid_end,
14347 14366 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
14348 14367 dtrace_interrupt_enable(cookie);
14349 14368
14350 14369 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
14351 14370 dtrace_sync();
14352 14371
14353 14372 if (state->dts_getf != 0 &&
14354 14373 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
14355 14374 /*
14356 14375 * We don't have kernel privs but we have at least one call
14357 14376 * to getf(); we need to lower our zone's count, and (if
14358 14377 * this is the last enabling to have an unprivileged call
14359 14378 * to getf()) we need to clear the closef() hook.
14360 14379 */
14361 14380 ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
14362 14381 ASSERT(dtrace_closef == dtrace_getf_barrier);
14363 14382 ASSERT(dtrace_getf > 0);
14364 14383
14365 14384 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
14366 14385
14367 14386 if (--dtrace_getf == 0)
14368 14387 dtrace_closef = NULL;
14369 14388 }
14370 14389
14371 14390 return (0);
14372 14391 }
14373 14392
14374 14393 static int
14375 14394 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
14376 14395 dtrace_optval_t val)
14377 14396 {
14378 14397 ASSERT(MUTEX_HELD(&dtrace_lock));
14379 14398
14380 14399 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
14381 14400 return (EBUSY);
14382 14401
14383 14402 if (option >= DTRACEOPT_MAX)
14384 14403 return (EINVAL);
14385 14404
14386 14405 if (option != DTRACEOPT_CPU && val < 0)
14387 14406 return (EINVAL);
14388 14407
14389 14408 switch (option) {
14390 14409 case DTRACEOPT_DESTRUCTIVE:
14391 14410 if (dtrace_destructive_disallow)
14392 14411 return (EACCES);
14393 14412
14394 14413 state->dts_cred.dcr_destructive = 1;
14395 14414 break;
14396 14415
14397 14416 case DTRACEOPT_BUFSIZE:
14398 14417 case DTRACEOPT_DYNVARSIZE:
14399 14418 case DTRACEOPT_AGGSIZE:
14400 14419 case DTRACEOPT_SPECSIZE:
14401 14420 case DTRACEOPT_STRSIZE:
14402 14421 if (val < 0)
14403 14422 return (EINVAL);
14404 14423
14405 14424 if (val >= LONG_MAX) {
14406 14425 /*
14407 14426 * If this is an otherwise negative value, set it to
14408 14427 * the highest multiple of 128m less than LONG_MAX.
14409 14428 * Technically, we're adjusting the size without
14410 14429 * regard to the buffer resizing policy, but in fact,
14411 14430 * this has no effect -- if we set the buffer size to
14412 14431 * ~LONG_MAX and the buffer policy is ultimately set to
14413 14432 * be "manual", the buffer allocation is guaranteed to
14414 14433 * fail, if only because the allocation requires two
14415 14434 * buffers. (We set the the size to the highest
14416 14435 * multiple of 128m because it ensures that the size
14417 14436 * will remain a multiple of a megabyte when
14418 14437 * repeatedly halved -- all the way down to 15m.)
14419 14438 */
14420 14439 val = LONG_MAX - (1 << 27) + 1;
14421 14440 }
14422 14441 }
14423 14442
14424 14443 state->dts_options[option] = val;
14425 14444
14426 14445 return (0);
14427 14446 }
14428 14447
14429 14448 static void
14430 14449 dtrace_state_destroy(dtrace_state_t *state)
14431 14450 {
14432 14451 dtrace_ecb_t *ecb;
14433 14452 dtrace_vstate_t *vstate = &state->dts_vstate;
14434 14453 minor_t minor = getminor(state->dts_dev);
14435 14454 int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
14436 14455 dtrace_speculation_t *spec = state->dts_speculations;
14437 14456 int nspec = state->dts_nspeculations;
14438 14457 uint32_t match;
14439 14458
14440 14459 ASSERT(MUTEX_HELD(&dtrace_lock));
14441 14460 ASSERT(MUTEX_HELD(&cpu_lock));
14442 14461
14443 14462 /*
14444 14463 * First, retract any retained enablings for this state.
14445 14464 */
14446 14465 dtrace_enabling_retract(state);
14447 14466 ASSERT(state->dts_nretained == 0);
14448 14467
14449 14468 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
14450 14469 state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
14451 14470 /*
14452 14471 * We have managed to come into dtrace_state_destroy() on a
14453 14472 * hot enabling -- almost certainly because of a disorderly
14454 14473 * shutdown of a consumer. (That is, a consumer that is
14455 14474 * exiting without having called dtrace_stop().) In this case,
14456 14475 * we're going to set our activity to be KILLED, and then
14457 14476 * issue a sync to be sure that everyone is out of probe
14458 14477 * context before we start blowing away ECBs.
14459 14478 */
14460 14479 state->dts_activity = DTRACE_ACTIVITY_KILLED;
14461 14480 dtrace_sync();
14462 14481 }
14463 14482
14464 14483 /*
14465 14484 * Release the credential hold we took in dtrace_state_create().
14466 14485 */
14467 14486 if (state->dts_cred.dcr_cred != NULL)
14468 14487 crfree(state->dts_cred.dcr_cred);
14469 14488
14470 14489 /*
14471 14490 * Now we can safely disable and destroy any enabled probes. Because
14472 14491 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
14473 14492 * (especially if they're all enabled), we take two passes through the
14474 14493 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
14475 14494 * in the second we disable whatever is left over.
14476 14495 */
14477 14496 for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
14478 14497 for (i = 0; i < state->dts_necbs; i++) {
14479 14498 if ((ecb = state->dts_ecbs[i]) == NULL)
14480 14499 continue;
14481 14500
14482 14501 if (match && ecb->dte_probe != NULL) {
14483 14502 dtrace_probe_t *probe = ecb->dte_probe;
14484 14503 dtrace_provider_t *prov = probe->dtpr_provider;
14485 14504
14486 14505 if (!(prov->dtpv_priv.dtpp_flags & match))
14487 14506 continue;
14488 14507 }
14489 14508
14490 14509 dtrace_ecb_disable(ecb);
14491 14510 dtrace_ecb_destroy(ecb);
14492 14511 }
14493 14512
14494 14513 if (!match)
14495 14514 break;
14496 14515 }
14497 14516
14498 14517 /*
14499 14518 * Before we free the buffers, perform one more sync to assure that
14500 14519 * every CPU is out of probe context.
14501 14520 */
14502 14521 dtrace_sync();
14503 14522
14504 14523 dtrace_buffer_free(state->dts_buffer);
14505 14524 dtrace_buffer_free(state->dts_aggbuffer);
14506 14525
14507 14526 for (i = 0; i < nspec; i++)
14508 14527 dtrace_buffer_free(spec[i].dtsp_buffer);
14509 14528
14510 14529 if (state->dts_cleaner != CYCLIC_NONE)
14511 14530 cyclic_remove(state->dts_cleaner);
14512 14531
14513 14532 if (state->dts_deadman != CYCLIC_NONE)
14514 14533 cyclic_remove(state->dts_deadman);
14515 14534
14516 14535 dtrace_dstate_fini(&vstate->dtvs_dynvars);
14517 14536 dtrace_vstate_fini(vstate);
14518 14537 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
14519 14538
14520 14539 if (state->dts_aggregations != NULL) {
14521 14540 #ifdef DEBUG
14522 14541 for (i = 0; i < state->dts_naggregations; i++)
14523 14542 ASSERT(state->dts_aggregations[i] == NULL);
14524 14543 #endif
14525 14544 ASSERT(state->dts_naggregations > 0);
14526 14545 kmem_free(state->dts_aggregations,
14527 14546 state->dts_naggregations * sizeof (dtrace_aggregation_t *));
14528 14547 }
14529 14548
14530 14549 kmem_free(state->dts_buffer, bufsize);
14531 14550 kmem_free(state->dts_aggbuffer, bufsize);
14532 14551
14533 14552 for (i = 0; i < nspec; i++)
14534 14553 kmem_free(spec[i].dtsp_buffer, bufsize);
14535 14554
14536 14555 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
14537 14556
14538 14557 dtrace_format_destroy(state);
14539 14558
14540 14559 vmem_destroy(state->dts_aggid_arena);
14541 14560 ddi_soft_state_free(dtrace_softstate, minor);
14542 14561 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
14543 14562 }
14544 14563
14545 14564 /*
14546 14565 * DTrace Anonymous Enabling Functions
14547 14566 */
14548 14567 static dtrace_state_t *
14549 14568 dtrace_anon_grab(void)
14550 14569 {
14551 14570 dtrace_state_t *state;
14552 14571
14553 14572 ASSERT(MUTEX_HELD(&dtrace_lock));
14554 14573
14555 14574 if ((state = dtrace_anon.dta_state) == NULL) {
14556 14575 ASSERT(dtrace_anon.dta_enabling == NULL);
14557 14576 return (NULL);
14558 14577 }
14559 14578
14560 14579 ASSERT(dtrace_anon.dta_enabling != NULL);
14561 14580 ASSERT(dtrace_retained != NULL);
14562 14581
14563 14582 dtrace_enabling_destroy(dtrace_anon.dta_enabling);
14564 14583 dtrace_anon.dta_enabling = NULL;
14565 14584 dtrace_anon.dta_state = NULL;
14566 14585
14567 14586 return (state);
14568 14587 }
14569 14588
14570 14589 static void
14571 14590 dtrace_anon_property(void)
14572 14591 {
14573 14592 int i, rv;
14574 14593 dtrace_state_t *state;
14575 14594 dof_hdr_t *dof;
14576 14595 char c[32]; /* enough for "dof-data-" + digits */
14577 14596
14578 14597 ASSERT(MUTEX_HELD(&dtrace_lock));
14579 14598 ASSERT(MUTEX_HELD(&cpu_lock));
14580 14599
14581 14600 for (i = 0; ; i++) {
14582 14601 (void) snprintf(c, sizeof (c), "dof-data-%d", i);
14583 14602
14584 14603 dtrace_err_verbose = 1;
14585 14604
14586 14605 if ((dof = dtrace_dof_property(c)) == NULL) {
14587 14606 dtrace_err_verbose = 0;
14588 14607 break;
14589 14608 }
14590 14609
14591 14610 /*
14592 14611 * We want to create anonymous state, so we need to transition
14593 14612 * the kernel debugger to indicate that DTrace is active. If
14594 14613 * this fails (e.g. because the debugger has modified text in
14595 14614 * some way), we won't continue with the processing.
14596 14615 */
14597 14616 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
14598 14617 cmn_err(CE_NOTE, "kernel debugger active; anonymous "
14599 14618 "enabling ignored.");
14600 14619 dtrace_dof_destroy(dof);
14601 14620 break;
14602 14621 }
14603 14622
14604 14623 /*
14605 14624 * If we haven't allocated an anonymous state, we'll do so now.
14606 14625 */
14607 14626 if ((state = dtrace_anon.dta_state) == NULL) {
14608 14627 state = dtrace_state_create(NULL, NULL);
14609 14628 dtrace_anon.dta_state = state;
14610 14629
14611 14630 if (state == NULL) {
14612 14631 /*
14613 14632 * This basically shouldn't happen: the only
14614 14633 * failure mode from dtrace_state_create() is a
14615 14634 * failure of ddi_soft_state_zalloc() that
14616 14635 * itself should never happen. Still, the
14617 14636 * interface allows for a failure mode, and
14618 14637 * we want to fail as gracefully as possible:
14619 14638 * we'll emit an error message and cease
14620 14639 * processing anonymous state in this case.
14621 14640 */
14622 14641 cmn_err(CE_WARN, "failed to create "
14623 14642 "anonymous state");
14624 14643 dtrace_dof_destroy(dof);
14625 14644 break;
14626 14645 }
14627 14646 }
14628 14647
14629 14648 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
14630 14649 &dtrace_anon.dta_enabling, 0, B_TRUE);
14631 14650
14632 14651 if (rv == 0)
14633 14652 rv = dtrace_dof_options(dof, state);
14634 14653
14635 14654 dtrace_err_verbose = 0;
14636 14655 dtrace_dof_destroy(dof);
14637 14656
14638 14657 if (rv != 0) {
14639 14658 /*
14640 14659 * This is malformed DOF; chuck any anonymous state
14641 14660 * that we created.
14642 14661 */
14643 14662 ASSERT(dtrace_anon.dta_enabling == NULL);
14644 14663 dtrace_state_destroy(state);
14645 14664 dtrace_anon.dta_state = NULL;
14646 14665 break;
14647 14666 }
14648 14667
14649 14668 ASSERT(dtrace_anon.dta_enabling != NULL);
14650 14669 }
14651 14670
14652 14671 if (dtrace_anon.dta_enabling != NULL) {
14653 14672 int rval;
14654 14673
14655 14674 /*
14656 14675 * dtrace_enabling_retain() can only fail because we are
14657 14676 * trying to retain more enablings than are allowed -- but
14658 14677 * we only have one anonymous enabling, and we are guaranteed
14659 14678 * to be allowed at least one retained enabling; we assert
14660 14679 * that dtrace_enabling_retain() returns success.
14661 14680 */
14662 14681 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
14663 14682 ASSERT(rval == 0);
14664 14683
14665 14684 dtrace_enabling_dump(dtrace_anon.dta_enabling);
14666 14685 }
14667 14686 }
14668 14687
14669 14688 /*
14670 14689 * DTrace Helper Functions
14671 14690 */
14672 14691 static void
14673 14692 dtrace_helper_trace(dtrace_helper_action_t *helper,
14674 14693 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
14675 14694 {
14676 14695 uint32_t size, next, nnext, i;
14677 14696 dtrace_helptrace_t *ent, *buffer;
14678 14697 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
14679 14698
14680 14699 if ((buffer = dtrace_helptrace_buffer) == NULL)
14681 14700 return;
14682 14701
14683 14702 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
14684 14703
14685 14704 /*
14686 14705 * What would a tracing framework be without its own tracing
14687 14706 * framework? (Well, a hell of a lot simpler, for starters...)
14688 14707 */
14689 14708 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
14690 14709 sizeof (uint64_t) - sizeof (uint64_t);
14691 14710
14692 14711 /*
14693 14712 * Iterate until we can allocate a slot in the trace buffer.
14694 14713 */
14695 14714 do {
14696 14715 next = dtrace_helptrace_next;
14697 14716
14698 14717 if (next + size < dtrace_helptrace_bufsize) {
14699 14718 nnext = next + size;
14700 14719 } else {
14701 14720 nnext = size;
14702 14721 }
14703 14722 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
14704 14723
14705 14724 /*
14706 14725 * We have our slot; fill it in.
14707 14726 */
14708 14727 if (nnext == size) {
14709 14728 dtrace_helptrace_wrapped++;
14710 14729 next = 0;
14711 14730 }
14712 14731
14713 14732 ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next);
14714 14733 ent->dtht_helper = helper;
14715 14734 ent->dtht_where = where;
14716 14735 ent->dtht_nlocals = vstate->dtvs_nlocals;
14717 14736
14718 14737 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
14719 14738 mstate->dtms_fltoffs : -1;
14720 14739 ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
14721 14740 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
14722 14741
14723 14742 for (i = 0; i < vstate->dtvs_nlocals; i++) {
14724 14743 dtrace_statvar_t *svar;
14725 14744
14726 14745 if ((svar = vstate->dtvs_locals[i]) == NULL)
14727 14746 continue;
14728 14747
14729 14748 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
14730 14749 ent->dtht_locals[i] =
14731 14750 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
14732 14751 }
14733 14752 }
14734 14753
14735 14754 static uint64_t
14736 14755 dtrace_helper(int which, dtrace_mstate_t *mstate,
14737 14756 dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
14738 14757 {
14739 14758 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
14740 14759 uint64_t sarg0 = mstate->dtms_arg[0];
14741 14760 uint64_t sarg1 = mstate->dtms_arg[1];
14742 14761 uint64_t rval;
14743 14762 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
14744 14763 dtrace_helper_action_t *helper;
14745 14764 dtrace_vstate_t *vstate;
14746 14765 dtrace_difo_t *pred;
14747 14766 int i, trace = dtrace_helptrace_buffer != NULL;
14748 14767
14749 14768 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
14750 14769
14751 14770 if (helpers == NULL)
14752 14771 return (0);
14753 14772
14754 14773 if ((helper = helpers->dthps_actions[which]) == NULL)
14755 14774 return (0);
14756 14775
14757 14776 vstate = &helpers->dthps_vstate;
14758 14777 mstate->dtms_arg[0] = arg0;
14759 14778 mstate->dtms_arg[1] = arg1;
14760 14779
14761 14780 /*
14762 14781 * Now iterate over each helper. If its predicate evaluates to 'true',
14763 14782 * we'll call the corresponding actions. Note that the below calls
14764 14783 * to dtrace_dif_emulate() may set faults in machine state. This is
14765 14784 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
14766 14785 * the stored DIF offset with its own (which is the desired behavior).
14767 14786 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
14768 14787 * from machine state; this is okay, too.
14769 14788 */
14770 14789 for (; helper != NULL; helper = helper->dtha_next) {
14771 14790 if ((pred = helper->dtha_predicate) != NULL) {
14772 14791 if (trace)
14773 14792 dtrace_helper_trace(helper, mstate, vstate, 0);
14774 14793
14775 14794 if (!dtrace_dif_emulate(pred, mstate, vstate, state))
14776 14795 goto next;
14777 14796
14778 14797 if (*flags & CPU_DTRACE_FAULT)
14779 14798 goto err;
14780 14799 }
14781 14800
14782 14801 for (i = 0; i < helper->dtha_nactions; i++) {
14783 14802 if (trace)
14784 14803 dtrace_helper_trace(helper,
14785 14804 mstate, vstate, i + 1);
14786 14805
14787 14806 rval = dtrace_dif_emulate(helper->dtha_actions[i],
14788 14807 mstate, vstate, state);
14789 14808
14790 14809 if (*flags & CPU_DTRACE_FAULT)
14791 14810 goto err;
14792 14811 }
14793 14812
14794 14813 next:
14795 14814 if (trace)
14796 14815 dtrace_helper_trace(helper, mstate, vstate,
14797 14816 DTRACE_HELPTRACE_NEXT);
14798 14817 }
14799 14818
14800 14819 if (trace)
14801 14820 dtrace_helper_trace(helper, mstate, vstate,
14802 14821 DTRACE_HELPTRACE_DONE);
14803 14822
14804 14823 /*
14805 14824 * Restore the arg0 that we saved upon entry.
14806 14825 */
14807 14826 mstate->dtms_arg[0] = sarg0;
14808 14827 mstate->dtms_arg[1] = sarg1;
14809 14828
14810 14829 return (rval);
14811 14830
14812 14831 err:
14813 14832 if (trace)
14814 14833 dtrace_helper_trace(helper, mstate, vstate,
14815 14834 DTRACE_HELPTRACE_ERR);
14816 14835
14817 14836 /*
14818 14837 * Restore the arg0 that we saved upon entry.
14819 14838 */
14820 14839 mstate->dtms_arg[0] = sarg0;
14821 14840 mstate->dtms_arg[1] = sarg1;
14822 14841
14823 14842 return (NULL);
14824 14843 }
14825 14844
14826 14845 static void
14827 14846 dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
14828 14847 dtrace_vstate_t *vstate)
14829 14848 {
14830 14849 int i;
14831 14850
14832 14851 if (helper->dtha_predicate != NULL)
14833 14852 dtrace_difo_release(helper->dtha_predicate, vstate);
14834 14853
14835 14854 for (i = 0; i < helper->dtha_nactions; i++) {
14836 14855 ASSERT(helper->dtha_actions[i] != NULL);
14837 14856 dtrace_difo_release(helper->dtha_actions[i], vstate);
14838 14857 }
14839 14858
14840 14859 kmem_free(helper->dtha_actions,
14841 14860 helper->dtha_nactions * sizeof (dtrace_difo_t *));
14842 14861 kmem_free(helper, sizeof (dtrace_helper_action_t));
14843 14862 }
14844 14863
14845 14864 static int
14846 14865 dtrace_helper_destroygen(int gen)
14847 14866 {
14848 14867 proc_t *p = curproc;
14849 14868 dtrace_helpers_t *help = p->p_dtrace_helpers;
14850 14869 dtrace_vstate_t *vstate;
14851 14870 int i;
14852 14871
14853 14872 ASSERT(MUTEX_HELD(&dtrace_lock));
14854 14873
14855 14874 if (help == NULL || gen > help->dthps_generation)
14856 14875 return (EINVAL);
14857 14876
14858 14877 vstate = &help->dthps_vstate;
14859 14878
14860 14879 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14861 14880 dtrace_helper_action_t *last = NULL, *h, *next;
14862 14881
14863 14882 for (h = help->dthps_actions[i]; h != NULL; h = next) {
14864 14883 next = h->dtha_next;
14865 14884
14866 14885 if (h->dtha_generation == gen) {
14867 14886 if (last != NULL) {
14868 14887 last->dtha_next = next;
14869 14888 } else {
14870 14889 help->dthps_actions[i] = next;
14871 14890 }
14872 14891
14873 14892 dtrace_helper_action_destroy(h, vstate);
14874 14893 } else {
14875 14894 last = h;
14876 14895 }
14877 14896 }
14878 14897 }
14879 14898
14880 14899 /*
14881 14900 * Interate until we've cleared out all helper providers with the
14882 14901 * given generation number.
14883 14902 */
14884 14903 for (;;) {
14885 14904 dtrace_helper_provider_t *prov;
14886 14905
14887 14906 /*
14888 14907 * Look for a helper provider with the right generation. We
14889 14908 * have to start back at the beginning of the list each time
14890 14909 * because we drop dtrace_lock. It's unlikely that we'll make
14891 14910 * more than two passes.
14892 14911 */
14893 14912 for (i = 0; i < help->dthps_nprovs; i++) {
14894 14913 prov = help->dthps_provs[i];
14895 14914
14896 14915 if (prov->dthp_generation == gen)
14897 14916 break;
14898 14917 }
14899 14918
14900 14919 /*
14901 14920 * If there were no matches, we're done.
14902 14921 */
14903 14922 if (i == help->dthps_nprovs)
14904 14923 break;
14905 14924
14906 14925 /*
14907 14926 * Move the last helper provider into this slot.
14908 14927 */
14909 14928 help->dthps_nprovs--;
14910 14929 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
14911 14930 help->dthps_provs[help->dthps_nprovs] = NULL;
14912 14931
14913 14932 mutex_exit(&dtrace_lock);
14914 14933
14915 14934 /*
14916 14935 * If we have a meta provider, remove this helper provider.
14917 14936 */
14918 14937 mutex_enter(&dtrace_meta_lock);
14919 14938 if (dtrace_meta_pid != NULL) {
14920 14939 ASSERT(dtrace_deferred_pid == NULL);
14921 14940 dtrace_helper_provider_remove(&prov->dthp_prov,
14922 14941 p->p_pid);
14923 14942 }
14924 14943 mutex_exit(&dtrace_meta_lock);
14925 14944
14926 14945 dtrace_helper_provider_destroy(prov);
14927 14946
14928 14947 mutex_enter(&dtrace_lock);
14929 14948 }
14930 14949
14931 14950 return (0);
14932 14951 }
14933 14952
14934 14953 static int
14935 14954 dtrace_helper_validate(dtrace_helper_action_t *helper)
14936 14955 {
14937 14956 int err = 0, i;
14938 14957 dtrace_difo_t *dp;
14939 14958
14940 14959 if ((dp = helper->dtha_predicate) != NULL)
14941 14960 err += dtrace_difo_validate_helper(dp);
14942 14961
14943 14962 for (i = 0; i < helper->dtha_nactions; i++)
14944 14963 err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
14945 14964
14946 14965 return (err == 0);
14947 14966 }
14948 14967
14949 14968 static int
14950 14969 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep)
14951 14970 {
14952 14971 dtrace_helpers_t *help;
14953 14972 dtrace_helper_action_t *helper, *last;
14954 14973 dtrace_actdesc_t *act;
14955 14974 dtrace_vstate_t *vstate;
14956 14975 dtrace_predicate_t *pred;
14957 14976 int count = 0, nactions = 0, i;
14958 14977
14959 14978 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
14960 14979 return (EINVAL);
14961 14980
14962 14981 help = curproc->p_dtrace_helpers;
14963 14982 last = help->dthps_actions[which];
14964 14983 vstate = &help->dthps_vstate;
14965 14984
14966 14985 for (count = 0; last != NULL; last = last->dtha_next) {
14967 14986 count++;
14968 14987 if (last->dtha_next == NULL)
14969 14988 break;
14970 14989 }
14971 14990
14972 14991 /*
14973 14992 * If we already have dtrace_helper_actions_max helper actions for this
14974 14993 * helper action type, we'll refuse to add a new one.
14975 14994 */
14976 14995 if (count >= dtrace_helper_actions_max)
14977 14996 return (ENOSPC);
14978 14997
14979 14998 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
14980 14999 helper->dtha_generation = help->dthps_generation;
14981 15000
14982 15001 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
14983 15002 ASSERT(pred->dtp_difo != NULL);
14984 15003 dtrace_difo_hold(pred->dtp_difo);
14985 15004 helper->dtha_predicate = pred->dtp_difo;
14986 15005 }
14987 15006
14988 15007 for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
14989 15008 if (act->dtad_kind != DTRACEACT_DIFEXPR)
14990 15009 goto err;
14991 15010
14992 15011 if (act->dtad_difo == NULL)
14993 15012 goto err;
14994 15013
14995 15014 nactions++;
14996 15015 }
14997 15016
14998 15017 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
14999 15018 (helper->dtha_nactions = nactions), KM_SLEEP);
15000 15019
15001 15020 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
15002 15021 dtrace_difo_hold(act->dtad_difo);
15003 15022 helper->dtha_actions[i++] = act->dtad_difo;
15004 15023 }
15005 15024
15006 15025 if (!dtrace_helper_validate(helper))
15007 15026 goto err;
15008 15027
15009 15028 if (last == NULL) {
15010 15029 help->dthps_actions[which] = helper;
15011 15030 } else {
15012 15031 last->dtha_next = helper;
15013 15032 }
15014 15033
15015 15034 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
15016 15035 dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
15017 15036 dtrace_helptrace_next = 0;
15018 15037 }
15019 15038
15020 15039 return (0);
15021 15040 err:
15022 15041 dtrace_helper_action_destroy(helper, vstate);
15023 15042 return (EINVAL);
15024 15043 }
15025 15044
15026 15045 static void
15027 15046 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
15028 15047 dof_helper_t *dofhp)
15029 15048 {
15030 15049 ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
15031 15050
15032 15051 mutex_enter(&dtrace_meta_lock);
15033 15052 mutex_enter(&dtrace_lock);
15034 15053
15035 15054 if (!dtrace_attached() || dtrace_meta_pid == NULL) {
15036 15055 /*
15037 15056 * If the dtrace module is loaded but not attached, or if
15038 15057 * there aren't isn't a meta provider registered to deal with
15039 15058 * these provider descriptions, we need to postpone creating
15040 15059 * the actual providers until later.
15041 15060 */
15042 15061
15043 15062 if (help->dthps_next == NULL && help->dthps_prev == NULL &&
15044 15063 dtrace_deferred_pid != help) {
15045 15064 help->dthps_deferred = 1;
15046 15065 help->dthps_pid = p->p_pid;
15047 15066 help->dthps_next = dtrace_deferred_pid;
15048 15067 help->dthps_prev = NULL;
15049 15068 if (dtrace_deferred_pid != NULL)
15050 15069 dtrace_deferred_pid->dthps_prev = help;
15051 15070 dtrace_deferred_pid = help;
15052 15071 }
15053 15072
15054 15073 mutex_exit(&dtrace_lock);
15055 15074
15056 15075 } else if (dofhp != NULL) {
15057 15076 /*
15058 15077 * If the dtrace module is loaded and we have a particular
15059 15078 * helper provider description, pass that off to the
15060 15079 * meta provider.
15061 15080 */
15062 15081
15063 15082 mutex_exit(&dtrace_lock);
15064 15083
15065 15084 dtrace_helper_provide(dofhp, p->p_pid);
15066 15085
15067 15086 } else {
15068 15087 /*
15069 15088 * Otherwise, just pass all the helper provider descriptions
15070 15089 * off to the meta provider.
15071 15090 */
15072 15091
15073 15092 int i;
15074 15093 mutex_exit(&dtrace_lock);
15075 15094
15076 15095 for (i = 0; i < help->dthps_nprovs; i++) {
15077 15096 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
15078 15097 p->p_pid);
15079 15098 }
15080 15099 }
15081 15100
15082 15101 mutex_exit(&dtrace_meta_lock);
15083 15102 }
15084 15103
15085 15104 static int
15086 15105 dtrace_helper_provider_add(dof_helper_t *dofhp, int gen)
15087 15106 {
15088 15107 dtrace_helpers_t *help;
15089 15108 dtrace_helper_provider_t *hprov, **tmp_provs;
15090 15109 uint_t tmp_maxprovs, i;
15091 15110
15092 15111 ASSERT(MUTEX_HELD(&dtrace_lock));
15093 15112
15094 15113 help = curproc->p_dtrace_helpers;
15095 15114 ASSERT(help != NULL);
15096 15115
15097 15116 /*
15098 15117 * If we already have dtrace_helper_providers_max helper providers,
15099 15118 * we're refuse to add a new one.
15100 15119 */
15101 15120 if (help->dthps_nprovs >= dtrace_helper_providers_max)
15102 15121 return (ENOSPC);
15103 15122
15104 15123 /*
15105 15124 * Check to make sure this isn't a duplicate.
15106 15125 */
15107 15126 for (i = 0; i < help->dthps_nprovs; i++) {
15108 15127 if (dofhp->dofhp_addr ==
15109 15128 help->dthps_provs[i]->dthp_prov.dofhp_addr)
15110 15129 return (EALREADY);
15111 15130 }
15112 15131
15113 15132 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
15114 15133 hprov->dthp_prov = *dofhp;
15115 15134 hprov->dthp_ref = 1;
15116 15135 hprov->dthp_generation = gen;
15117 15136
15118 15137 /*
15119 15138 * Allocate a bigger table for helper providers if it's already full.
15120 15139 */
15121 15140 if (help->dthps_maxprovs == help->dthps_nprovs) {
15122 15141 tmp_maxprovs = help->dthps_maxprovs;
15123 15142 tmp_provs = help->dthps_provs;
15124 15143
15125 15144 if (help->dthps_maxprovs == 0)
15126 15145 help->dthps_maxprovs = 2;
15127 15146 else
15128 15147 help->dthps_maxprovs *= 2;
15129 15148 if (help->dthps_maxprovs > dtrace_helper_providers_max)
15130 15149 help->dthps_maxprovs = dtrace_helper_providers_max;
15131 15150
15132 15151 ASSERT(tmp_maxprovs < help->dthps_maxprovs);
15133 15152
15134 15153 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
15135 15154 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
15136 15155
15137 15156 if (tmp_provs != NULL) {
15138 15157 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
15139 15158 sizeof (dtrace_helper_provider_t *));
15140 15159 kmem_free(tmp_provs, tmp_maxprovs *
15141 15160 sizeof (dtrace_helper_provider_t *));
15142 15161 }
15143 15162 }
15144 15163
15145 15164 help->dthps_provs[help->dthps_nprovs] = hprov;
15146 15165 help->dthps_nprovs++;
15147 15166
15148 15167 return (0);
15149 15168 }
15150 15169
15151 15170 static void
15152 15171 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
15153 15172 {
15154 15173 mutex_enter(&dtrace_lock);
15155 15174
15156 15175 if (--hprov->dthp_ref == 0) {
15157 15176 dof_hdr_t *dof;
15158 15177 mutex_exit(&dtrace_lock);
15159 15178 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
15160 15179 dtrace_dof_destroy(dof);
15161 15180 kmem_free(hprov, sizeof (dtrace_helper_provider_t));
15162 15181 } else {
15163 15182 mutex_exit(&dtrace_lock);
15164 15183 }
15165 15184 }
15166 15185
15167 15186 static int
15168 15187 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
15169 15188 {
15170 15189 uintptr_t daddr = (uintptr_t)dof;
15171 15190 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
15172 15191 dof_provider_t *provider;
15173 15192 dof_probe_t *probe;
15174 15193 uint8_t *arg;
15175 15194 char *strtab, *typestr;
15176 15195 dof_stridx_t typeidx;
15177 15196 size_t typesz;
15178 15197 uint_t nprobes, j, k;
15179 15198
15180 15199 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
15181 15200
15182 15201 if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
15183 15202 dtrace_dof_error(dof, "misaligned section offset");
15184 15203 return (-1);
15185 15204 }
15186 15205
15187 15206 /*
15188 15207 * The section needs to be large enough to contain the DOF provider
15189 15208 * structure appropriate for the given version.
15190 15209 */
15191 15210 if (sec->dofs_size <
15192 15211 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
15193 15212 offsetof(dof_provider_t, dofpv_prenoffs) :
15194 15213 sizeof (dof_provider_t))) {
15195 15214 dtrace_dof_error(dof, "provider section too small");
15196 15215 return (-1);
15197 15216 }
15198 15217
15199 15218 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
15200 15219 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
15201 15220 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
15202 15221 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
15203 15222 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
15204 15223
15205 15224 if (str_sec == NULL || prb_sec == NULL ||
15206 15225 arg_sec == NULL || off_sec == NULL)
15207 15226 return (-1);
15208 15227
15209 15228 enoff_sec = NULL;
15210 15229
15211 15230 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
15212 15231 provider->dofpv_prenoffs != DOF_SECT_NONE &&
15213 15232 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
15214 15233 provider->dofpv_prenoffs)) == NULL)
15215 15234 return (-1);
15216 15235
15217 15236 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
15218 15237
15219 15238 if (provider->dofpv_name >= str_sec->dofs_size ||
15220 15239 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
15221 15240 dtrace_dof_error(dof, "invalid provider name");
15222 15241 return (-1);
15223 15242 }
15224 15243
15225 15244 if (prb_sec->dofs_entsize == 0 ||
15226 15245 prb_sec->dofs_entsize > prb_sec->dofs_size) {
15227 15246 dtrace_dof_error(dof, "invalid entry size");
15228 15247 return (-1);
15229 15248 }
15230 15249
15231 15250 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
15232 15251 dtrace_dof_error(dof, "misaligned entry size");
15233 15252 return (-1);
15234 15253 }
15235 15254
15236 15255 if (off_sec->dofs_entsize != sizeof (uint32_t)) {
15237 15256 dtrace_dof_error(dof, "invalid entry size");
15238 15257 return (-1);
15239 15258 }
15240 15259
15241 15260 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
15242 15261 dtrace_dof_error(dof, "misaligned section offset");
15243 15262 return (-1);
15244 15263 }
15245 15264
15246 15265 if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
15247 15266 dtrace_dof_error(dof, "invalid entry size");
15248 15267 return (-1);
15249 15268 }
15250 15269
15251 15270 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
15252 15271
15253 15272 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
15254 15273
15255 15274 /*
15256 15275 * Take a pass through the probes to check for errors.
15257 15276 */
15258 15277 for (j = 0; j < nprobes; j++) {
15259 15278 probe = (dof_probe_t *)(uintptr_t)(daddr +
15260 15279 prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
15261 15280
15262 15281 if (probe->dofpr_func >= str_sec->dofs_size) {
15263 15282 dtrace_dof_error(dof, "invalid function name");
15264 15283 return (-1);
15265 15284 }
15266 15285
15267 15286 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
15268 15287 dtrace_dof_error(dof, "function name too long");
15269 15288 return (-1);
15270 15289 }
15271 15290
15272 15291 if (probe->dofpr_name >= str_sec->dofs_size ||
15273 15292 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
15274 15293 dtrace_dof_error(dof, "invalid probe name");
15275 15294 return (-1);
15276 15295 }
15277 15296
15278 15297 /*
15279 15298 * The offset count must not wrap the index, and the offsets
15280 15299 * must also not overflow the section's data.
15281 15300 */
15282 15301 if (probe->dofpr_offidx + probe->dofpr_noffs <
15283 15302 probe->dofpr_offidx ||
15284 15303 (probe->dofpr_offidx + probe->dofpr_noffs) *
15285 15304 off_sec->dofs_entsize > off_sec->dofs_size) {
15286 15305 dtrace_dof_error(dof, "invalid probe offset");
15287 15306 return (-1);
15288 15307 }
15289 15308
15290 15309 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
15291 15310 /*
15292 15311 * If there's no is-enabled offset section, make sure
15293 15312 * there aren't any is-enabled offsets. Otherwise
15294 15313 * perform the same checks as for probe offsets
15295 15314 * (immediately above).
15296 15315 */
15297 15316 if (enoff_sec == NULL) {
15298 15317 if (probe->dofpr_enoffidx != 0 ||
15299 15318 probe->dofpr_nenoffs != 0) {
15300 15319 dtrace_dof_error(dof, "is-enabled "
15301 15320 "offsets with null section");
15302 15321 return (-1);
15303 15322 }
15304 15323 } else if (probe->dofpr_enoffidx +
15305 15324 probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
15306 15325 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
15307 15326 enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
15308 15327 dtrace_dof_error(dof, "invalid is-enabled "
15309 15328 "offset");
15310 15329 return (-1);
15311 15330 }
15312 15331
15313 15332 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
15314 15333 dtrace_dof_error(dof, "zero probe and "
15315 15334 "is-enabled offsets");
15316 15335 return (-1);
15317 15336 }
15318 15337 } else if (probe->dofpr_noffs == 0) {
15319 15338 dtrace_dof_error(dof, "zero probe offsets");
15320 15339 return (-1);
15321 15340 }
15322 15341
15323 15342 if (probe->dofpr_argidx + probe->dofpr_xargc <
15324 15343 probe->dofpr_argidx ||
15325 15344 (probe->dofpr_argidx + probe->dofpr_xargc) *
15326 15345 arg_sec->dofs_entsize > arg_sec->dofs_size) {
15327 15346 dtrace_dof_error(dof, "invalid args");
15328 15347 return (-1);
15329 15348 }
15330 15349
15331 15350 typeidx = probe->dofpr_nargv;
15332 15351 typestr = strtab + probe->dofpr_nargv;
15333 15352 for (k = 0; k < probe->dofpr_nargc; k++) {
15334 15353 if (typeidx >= str_sec->dofs_size) {
15335 15354 dtrace_dof_error(dof, "bad "
15336 15355 "native argument type");
15337 15356 return (-1);
15338 15357 }
15339 15358
15340 15359 typesz = strlen(typestr) + 1;
15341 15360 if (typesz > DTRACE_ARGTYPELEN) {
15342 15361 dtrace_dof_error(dof, "native "
15343 15362 "argument type too long");
15344 15363 return (-1);
15345 15364 }
15346 15365 typeidx += typesz;
15347 15366 typestr += typesz;
15348 15367 }
15349 15368
15350 15369 typeidx = probe->dofpr_xargv;
15351 15370 typestr = strtab + probe->dofpr_xargv;
15352 15371 for (k = 0; k < probe->dofpr_xargc; k++) {
15353 15372 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
15354 15373 dtrace_dof_error(dof, "bad "
15355 15374 "native argument index");
15356 15375 return (-1);
15357 15376 }
15358 15377
15359 15378 if (typeidx >= str_sec->dofs_size) {
15360 15379 dtrace_dof_error(dof, "bad "
15361 15380 "translated argument type");
15362 15381 return (-1);
15363 15382 }
15364 15383
15365 15384 typesz = strlen(typestr) + 1;
15366 15385 if (typesz > DTRACE_ARGTYPELEN) {
15367 15386 dtrace_dof_error(dof, "translated argument "
15368 15387 "type too long");
15369 15388 return (-1);
15370 15389 }
15371 15390
15372 15391 typeidx += typesz;
15373 15392 typestr += typesz;
15374 15393 }
15375 15394 }
15376 15395
15377 15396 return (0);
15378 15397 }
15379 15398
15380 15399 static int
15381 15400 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp)
15382 15401 {
15383 15402 dtrace_helpers_t *help;
15384 15403 dtrace_vstate_t *vstate;
15385 15404 dtrace_enabling_t *enab = NULL;
15386 15405 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
15387 15406 uintptr_t daddr = (uintptr_t)dof;
15388 15407
15389 15408 ASSERT(MUTEX_HELD(&dtrace_lock));
15390 15409
15391 15410 if ((help = curproc->p_dtrace_helpers) == NULL)
15392 15411 help = dtrace_helpers_create(curproc);
15393 15412
15394 15413 vstate = &help->dthps_vstate;
15395 15414
15396 15415 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
15397 15416 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
15398 15417 dtrace_dof_destroy(dof);
15399 15418 return (rv);
15400 15419 }
15401 15420
15402 15421 /*
15403 15422 * Look for helper providers and validate their descriptions.
15404 15423 */
15405 15424 if (dhp != NULL) {
15406 15425 for (i = 0; i < dof->dofh_secnum; i++) {
15407 15426 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
15408 15427 dof->dofh_secoff + i * dof->dofh_secsize);
15409 15428
15410 15429 if (sec->dofs_type != DOF_SECT_PROVIDER)
15411 15430 continue;
15412 15431
15413 15432 if (dtrace_helper_provider_validate(dof, sec) != 0) {
15414 15433 dtrace_enabling_destroy(enab);
15415 15434 dtrace_dof_destroy(dof);
15416 15435 return (-1);
15417 15436 }
15418 15437
15419 15438 nprovs++;
15420 15439 }
15421 15440 }
15422 15441
15423 15442 /*
15424 15443 * Now we need to walk through the ECB descriptions in the enabling.
15425 15444 */
15426 15445 for (i = 0; i < enab->dten_ndesc; i++) {
15427 15446 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
15428 15447 dtrace_probedesc_t *desc = &ep->dted_probe;
15429 15448
15430 15449 if (strcmp(desc->dtpd_provider, "dtrace") != 0)
15431 15450 continue;
15432 15451
15433 15452 if (strcmp(desc->dtpd_mod, "helper") != 0)
15434 15453 continue;
15435 15454
15436 15455 if (strcmp(desc->dtpd_func, "ustack") != 0)
15437 15456 continue;
15438 15457
15439 15458 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
15440 15459 ep)) != 0) {
15441 15460 /*
15442 15461 * Adding this helper action failed -- we are now going
15443 15462 * to rip out the entire generation and return failure.
15444 15463 */
15445 15464 (void) dtrace_helper_destroygen(help->dthps_generation);
15446 15465 dtrace_enabling_destroy(enab);
15447 15466 dtrace_dof_destroy(dof);
15448 15467 return (-1);
15449 15468 }
15450 15469
15451 15470 nhelpers++;
15452 15471 }
15453 15472
15454 15473 if (nhelpers < enab->dten_ndesc)
15455 15474 dtrace_dof_error(dof, "unmatched helpers");
15456 15475
15457 15476 gen = help->dthps_generation++;
15458 15477 dtrace_enabling_destroy(enab);
15459 15478
15460 15479 if (dhp != NULL && nprovs > 0) {
15461 15480 /*
15462 15481 * Now that this is in-kernel, we change the sense of the
15463 15482 * members: dofhp_dof denotes the in-kernel copy of the DOF
15464 15483 * and dofhp_addr denotes the address at user-level.
15465 15484 */
15466 15485 dhp->dofhp_addr = dhp->dofhp_dof;
15467 15486 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
15468 15487
15469 15488 if (dtrace_helper_provider_add(dhp, gen) == 0) {
15470 15489 mutex_exit(&dtrace_lock);
15471 15490 dtrace_helper_provider_register(curproc, help, dhp);
15472 15491 mutex_enter(&dtrace_lock);
15473 15492
15474 15493 destroy = 0;
15475 15494 }
15476 15495 }
15477 15496
15478 15497 if (destroy)
15479 15498 dtrace_dof_destroy(dof);
15480 15499
15481 15500 return (gen);
15482 15501 }
15483 15502
15484 15503 static dtrace_helpers_t *
15485 15504 dtrace_helpers_create(proc_t *p)
15486 15505 {
15487 15506 dtrace_helpers_t *help;
15488 15507
15489 15508 ASSERT(MUTEX_HELD(&dtrace_lock));
15490 15509 ASSERT(p->p_dtrace_helpers == NULL);
15491 15510
15492 15511 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
15493 15512 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
15494 15513 DTRACE_NHELPER_ACTIONS, KM_SLEEP);
15495 15514
15496 15515 p->p_dtrace_helpers = help;
15497 15516 dtrace_helpers++;
15498 15517
15499 15518 return (help);
15500 15519 }
15501 15520
15502 15521 static void
15503 15522 dtrace_helpers_destroy(proc_t *p)
15504 15523 {
15505 15524 dtrace_helpers_t *help;
15506 15525 dtrace_vstate_t *vstate;
15507 15526 int i;
15508 15527
15509 15528 mutex_enter(&dtrace_lock);
15510 15529
15511 15530 ASSERT(p->p_dtrace_helpers != NULL);
15512 15531 ASSERT(dtrace_helpers > 0);
15513 15532
15514 15533 help = p->p_dtrace_helpers;
15515 15534 vstate = &help->dthps_vstate;
15516 15535
15517 15536 /*
15518 15537 * We're now going to lose the help from this process.
15519 15538 */
15520 15539 p->p_dtrace_helpers = NULL;
15521 15540 if (p == curproc) {
15522 15541 dtrace_sync();
15523 15542 } else {
15524 15543 /*
15525 15544 * It is sometimes necessary to clean up dtrace helpers from a
15526 15545 * an incomplete child process as part of a failed fork
15527 15546 * operation. In such situations, a dtrace_sync() call should
15528 15547 * be unnecessary as the process should be devoid of threads,
15529 15548 * much less any in probe context.
15530 15549 */
15531 15550 VERIFY(p->p_stat == SIDL);
15532 15551 }
15533 15552
15534 15553 /*
15535 15554 * Destroy the helper actions.
15536 15555 */
15537 15556 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15538 15557 dtrace_helper_action_t *h, *next;
15539 15558
15540 15559 for (h = help->dthps_actions[i]; h != NULL; h = next) {
15541 15560 next = h->dtha_next;
15542 15561 dtrace_helper_action_destroy(h, vstate);
15543 15562 h = next;
15544 15563 }
15545 15564 }
15546 15565
15547 15566 mutex_exit(&dtrace_lock);
15548 15567
15549 15568 /*
15550 15569 * Destroy the helper providers.
15551 15570 */
15552 15571 if (help->dthps_maxprovs > 0) {
15553 15572 mutex_enter(&dtrace_meta_lock);
15554 15573 if (dtrace_meta_pid != NULL) {
15555 15574 ASSERT(dtrace_deferred_pid == NULL);
15556 15575
15557 15576 for (i = 0; i < help->dthps_nprovs; i++) {
15558 15577 dtrace_helper_provider_remove(
15559 15578 &help->dthps_provs[i]->dthp_prov, p->p_pid);
15560 15579 }
15561 15580 } else {
15562 15581 mutex_enter(&dtrace_lock);
15563 15582 ASSERT(help->dthps_deferred == 0 ||
15564 15583 help->dthps_next != NULL ||
15565 15584 help->dthps_prev != NULL ||
15566 15585 help == dtrace_deferred_pid);
15567 15586
15568 15587 /*
15569 15588 * Remove the helper from the deferred list.
15570 15589 */
15571 15590 if (help->dthps_next != NULL)
15572 15591 help->dthps_next->dthps_prev = help->dthps_prev;
15573 15592 if (help->dthps_prev != NULL)
15574 15593 help->dthps_prev->dthps_next = help->dthps_next;
15575 15594 if (dtrace_deferred_pid == help) {
15576 15595 dtrace_deferred_pid = help->dthps_next;
15577 15596 ASSERT(help->dthps_prev == NULL);
15578 15597 }
15579 15598
15580 15599 mutex_exit(&dtrace_lock);
15581 15600 }
15582 15601
15583 15602 mutex_exit(&dtrace_meta_lock);
15584 15603
15585 15604 for (i = 0; i < help->dthps_nprovs; i++) {
15586 15605 dtrace_helper_provider_destroy(help->dthps_provs[i]);
15587 15606 }
15588 15607
15589 15608 kmem_free(help->dthps_provs, help->dthps_maxprovs *
15590 15609 sizeof (dtrace_helper_provider_t *));
15591 15610 }
15592 15611
15593 15612 mutex_enter(&dtrace_lock);
15594 15613
15595 15614 dtrace_vstate_fini(&help->dthps_vstate);
15596 15615 kmem_free(help->dthps_actions,
15597 15616 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
15598 15617 kmem_free(help, sizeof (dtrace_helpers_t));
15599 15618
15600 15619 --dtrace_helpers;
15601 15620 mutex_exit(&dtrace_lock);
15602 15621 }
15603 15622
15604 15623 static void
15605 15624 dtrace_helpers_duplicate(proc_t *from, proc_t *to)
15606 15625 {
15607 15626 dtrace_helpers_t *help, *newhelp;
15608 15627 dtrace_helper_action_t *helper, *new, *last;
15609 15628 dtrace_difo_t *dp;
15610 15629 dtrace_vstate_t *vstate;
15611 15630 int i, j, sz, hasprovs = 0;
15612 15631
15613 15632 mutex_enter(&dtrace_lock);
15614 15633 ASSERT(from->p_dtrace_helpers != NULL);
15615 15634 ASSERT(dtrace_helpers > 0);
15616 15635
15617 15636 help = from->p_dtrace_helpers;
15618 15637 newhelp = dtrace_helpers_create(to);
15619 15638 ASSERT(to->p_dtrace_helpers != NULL);
15620 15639
15621 15640 newhelp->dthps_generation = help->dthps_generation;
15622 15641 vstate = &newhelp->dthps_vstate;
15623 15642
15624 15643 /*
15625 15644 * Duplicate the helper actions.
15626 15645 */
15627 15646 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15628 15647 if ((helper = help->dthps_actions[i]) == NULL)
15629 15648 continue;
15630 15649
15631 15650 for (last = NULL; helper != NULL; helper = helper->dtha_next) {
15632 15651 new = kmem_zalloc(sizeof (dtrace_helper_action_t),
15633 15652 KM_SLEEP);
15634 15653 new->dtha_generation = helper->dtha_generation;
15635 15654
15636 15655 if ((dp = helper->dtha_predicate) != NULL) {
15637 15656 dp = dtrace_difo_duplicate(dp, vstate);
15638 15657 new->dtha_predicate = dp;
15639 15658 }
15640 15659
15641 15660 new->dtha_nactions = helper->dtha_nactions;
15642 15661 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
15643 15662 new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
15644 15663
15645 15664 for (j = 0; j < new->dtha_nactions; j++) {
15646 15665 dtrace_difo_t *dp = helper->dtha_actions[j];
15647 15666
15648 15667 ASSERT(dp != NULL);
15649 15668 dp = dtrace_difo_duplicate(dp, vstate);
15650 15669 new->dtha_actions[j] = dp;
15651 15670 }
15652 15671
15653 15672 if (last != NULL) {
15654 15673 last->dtha_next = new;
15655 15674 } else {
15656 15675 newhelp->dthps_actions[i] = new;
15657 15676 }
15658 15677
15659 15678 last = new;
15660 15679 }
15661 15680 }
15662 15681
15663 15682 /*
15664 15683 * Duplicate the helper providers and register them with the
15665 15684 * DTrace framework.
15666 15685 */
15667 15686 if (help->dthps_nprovs > 0) {
15668 15687 newhelp->dthps_nprovs = help->dthps_nprovs;
15669 15688 newhelp->dthps_maxprovs = help->dthps_nprovs;
15670 15689 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
15671 15690 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
15672 15691 for (i = 0; i < newhelp->dthps_nprovs; i++) {
15673 15692 newhelp->dthps_provs[i] = help->dthps_provs[i];
15674 15693 newhelp->dthps_provs[i]->dthp_ref++;
15675 15694 }
15676 15695
15677 15696 hasprovs = 1;
15678 15697 }
15679 15698
15680 15699 mutex_exit(&dtrace_lock);
15681 15700
15682 15701 if (hasprovs)
15683 15702 dtrace_helper_provider_register(to, newhelp, NULL);
15684 15703 }
15685 15704
15686 15705 /*
15687 15706 * DTrace Hook Functions
15688 15707 */
15689 15708 static void
15690 15709 dtrace_module_loaded(struct modctl *ctl)
15691 15710 {
15692 15711 dtrace_provider_t *prv;
15693 15712
15694 15713 mutex_enter(&dtrace_provider_lock);
15695 15714 mutex_enter(&mod_lock);
15696 15715
15697 15716 ASSERT(ctl->mod_busy);
15698 15717
15699 15718 /*
15700 15719 * We're going to call each providers per-module provide operation
15701 15720 * specifying only this module.
15702 15721 */
15703 15722 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
15704 15723 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
15705 15724
15706 15725 mutex_exit(&mod_lock);
15707 15726 mutex_exit(&dtrace_provider_lock);
15708 15727
15709 15728 /*
15710 15729 * If we have any retained enablings, we need to match against them.
15711 15730 * Enabling probes requires that cpu_lock be held, and we cannot hold
15712 15731 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
15713 15732 * module. (In particular, this happens when loading scheduling
15714 15733 * classes.) So if we have any retained enablings, we need to dispatch
15715 15734 * our task queue to do the match for us.
15716 15735 */
15717 15736 mutex_enter(&dtrace_lock);
15718 15737
15719 15738 if (dtrace_retained == NULL) {
15720 15739 mutex_exit(&dtrace_lock);
15721 15740 return;
15722 15741 }
15723 15742
15724 15743 (void) taskq_dispatch(dtrace_taskq,
15725 15744 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
15726 15745
15727 15746 mutex_exit(&dtrace_lock);
15728 15747
15729 15748 /*
15730 15749 * And now, for a little heuristic sleaze: in general, we want to
15731 15750 * match modules as soon as they load. However, we cannot guarantee
15732 15751 * this, because it would lead us to the lock ordering violation
15733 15752 * outlined above. The common case, of course, is that cpu_lock is
15734 15753 * _not_ held -- so we delay here for a clock tick, hoping that that's
15735 15754 * long enough for the task queue to do its work. If it's not, it's
15736 15755 * not a serious problem -- it just means that the module that we
15737 15756 * just loaded may not be immediately instrumentable.
15738 15757 */
15739 15758 delay(1);
15740 15759 }
15741 15760
15742 15761 static void
15743 15762 dtrace_module_unloaded(struct modctl *ctl)
15744 15763 {
15745 15764 dtrace_probe_t template, *probe, *first, *next;
15746 15765 dtrace_provider_t *prov;
15747 15766
15748 15767 template.dtpr_mod = ctl->mod_modname;
15749 15768
15750 15769 mutex_enter(&dtrace_provider_lock);
15751 15770 mutex_enter(&mod_lock);
15752 15771 mutex_enter(&dtrace_lock);
15753 15772
15754 15773 if (dtrace_bymod == NULL) {
15755 15774 /*
15756 15775 * The DTrace module is loaded (obviously) but not attached;
15757 15776 * we don't have any work to do.
15758 15777 */
15759 15778 mutex_exit(&dtrace_provider_lock);
15760 15779 mutex_exit(&mod_lock);
15761 15780 mutex_exit(&dtrace_lock);
15762 15781 return;
15763 15782 }
15764 15783
15765 15784 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
15766 15785 probe != NULL; probe = probe->dtpr_nextmod) {
15767 15786 if (probe->dtpr_ecb != NULL) {
15768 15787 mutex_exit(&dtrace_provider_lock);
15769 15788 mutex_exit(&mod_lock);
15770 15789 mutex_exit(&dtrace_lock);
15771 15790
15772 15791 /*
15773 15792 * This shouldn't _actually_ be possible -- we're
15774 15793 * unloading a module that has an enabled probe in it.
15775 15794 * (It's normally up to the provider to make sure that
15776 15795 * this can't happen.) However, because dtps_enable()
15777 15796 * doesn't have a failure mode, there can be an
15778 15797 * enable/unload race. Upshot: we don't want to
15779 15798 * assert, but we're not going to disable the
15780 15799 * probe, either.
15781 15800 */
15782 15801 if (dtrace_err_verbose) {
15783 15802 cmn_err(CE_WARN, "unloaded module '%s' had "
15784 15803 "enabled probes", ctl->mod_modname);
15785 15804 }
15786 15805
15787 15806 return;
15788 15807 }
15789 15808 }
15790 15809
15791 15810 probe = first;
15792 15811
15793 15812 for (first = NULL; probe != NULL; probe = next) {
15794 15813 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
15795 15814
15796 15815 dtrace_probes[probe->dtpr_id - 1] = NULL;
15797 15816
15798 15817 next = probe->dtpr_nextmod;
15799 15818 dtrace_hash_remove(dtrace_bymod, probe);
15800 15819 dtrace_hash_remove(dtrace_byfunc, probe);
15801 15820 dtrace_hash_remove(dtrace_byname, probe);
15802 15821
15803 15822 if (first == NULL) {
15804 15823 first = probe;
15805 15824 probe->dtpr_nextmod = NULL;
15806 15825 } else {
15807 15826 probe->dtpr_nextmod = first;
15808 15827 first = probe;
15809 15828 }
15810 15829 }
15811 15830
15812 15831 /*
15813 15832 * We've removed all of the module's probes from the hash chains and
15814 15833 * from the probe array. Now issue a dtrace_sync() to be sure that
15815 15834 * everyone has cleared out from any probe array processing.
15816 15835 */
15817 15836 dtrace_sync();
15818 15837
15819 15838 for (probe = first; probe != NULL; probe = first) {
15820 15839 first = probe->dtpr_nextmod;
15821 15840 prov = probe->dtpr_provider;
15822 15841 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
15823 15842 probe->dtpr_arg);
15824 15843 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
15825 15844 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
15826 15845 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
15827 15846 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
15828 15847 kmem_free(probe, sizeof (dtrace_probe_t));
15829 15848 }
15830 15849
15831 15850 mutex_exit(&dtrace_lock);
15832 15851 mutex_exit(&mod_lock);
15833 15852 mutex_exit(&dtrace_provider_lock);
15834 15853 }
15835 15854
15836 15855 void
15837 15856 dtrace_suspend(void)
15838 15857 {
15839 15858 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
15840 15859 }
15841 15860
15842 15861 void
15843 15862 dtrace_resume(void)
15844 15863 {
15845 15864 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
15846 15865 }
15847 15866
15848 15867 static int
15849 15868 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
15850 15869 {
15851 15870 ASSERT(MUTEX_HELD(&cpu_lock));
15852 15871 mutex_enter(&dtrace_lock);
15853 15872
15854 15873 switch (what) {
15855 15874 case CPU_CONFIG: {
15856 15875 dtrace_state_t *state;
15857 15876 dtrace_optval_t *opt, rs, c;
15858 15877
15859 15878 /*
15860 15879 * For now, we only allocate a new buffer for anonymous state.
15861 15880 */
15862 15881 if ((state = dtrace_anon.dta_state) == NULL)
15863 15882 break;
15864 15883
15865 15884 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
15866 15885 break;
15867 15886
15868 15887 opt = state->dts_options;
15869 15888 c = opt[DTRACEOPT_CPU];
15870 15889
15871 15890 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
15872 15891 break;
15873 15892
15874 15893 /*
15875 15894 * Regardless of what the actual policy is, we're going to
15876 15895 * temporarily set our resize policy to be manual. We're
15877 15896 * also going to temporarily set our CPU option to denote
15878 15897 * the newly configured CPU.
15879 15898 */
15880 15899 rs = opt[DTRACEOPT_BUFRESIZE];
15881 15900 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
15882 15901 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
15883 15902
15884 15903 (void) dtrace_state_buffers(state);
15885 15904
15886 15905 opt[DTRACEOPT_BUFRESIZE] = rs;
15887 15906 opt[DTRACEOPT_CPU] = c;
15888 15907
15889 15908 break;
15890 15909 }
15891 15910
15892 15911 case CPU_UNCONFIG:
15893 15912 /*
15894 15913 * We don't free the buffer in the CPU_UNCONFIG case. (The
15895 15914 * buffer will be freed when the consumer exits.)
15896 15915 */
15897 15916 break;
15898 15917
15899 15918 default:
15900 15919 break;
15901 15920 }
15902 15921
15903 15922 mutex_exit(&dtrace_lock);
15904 15923 return (0);
15905 15924 }
15906 15925
15907 15926 static void
15908 15927 dtrace_cpu_setup_initial(processorid_t cpu)
15909 15928 {
15910 15929 (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
15911 15930 }
15912 15931
15913 15932 static void
15914 15933 dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
15915 15934 {
15916 15935 if (dtrace_toxranges >= dtrace_toxranges_max) {
15917 15936 int osize, nsize;
15918 15937 dtrace_toxrange_t *range;
15919 15938
15920 15939 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
15921 15940
15922 15941 if (osize == 0) {
15923 15942 ASSERT(dtrace_toxrange == NULL);
15924 15943 ASSERT(dtrace_toxranges_max == 0);
15925 15944 dtrace_toxranges_max = 1;
15926 15945 } else {
15927 15946 dtrace_toxranges_max <<= 1;
15928 15947 }
15929 15948
15930 15949 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
15931 15950 range = kmem_zalloc(nsize, KM_SLEEP);
15932 15951
15933 15952 if (dtrace_toxrange != NULL) {
15934 15953 ASSERT(osize != 0);
15935 15954 bcopy(dtrace_toxrange, range, osize);
15936 15955 kmem_free(dtrace_toxrange, osize);
15937 15956 }
15938 15957
15939 15958 dtrace_toxrange = range;
15940 15959 }
15941 15960
15942 15961 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
15943 15962 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
15944 15963
15945 15964 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
15946 15965 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
15947 15966 dtrace_toxranges++;
15948 15967 }
15949 15968
15950 15969 static void
15951 15970 dtrace_getf_barrier()
15952 15971 {
15953 15972 /*
15954 15973 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
15955 15974 * that contain calls to getf(), this routine will be called on every
15956 15975 * closef() before either the underlying vnode is released or the
15957 15976 * file_t itself is freed. By the time we are here, it is essential
15958 15977 * that the file_t can no longer be accessed from a call to getf()
15959 15978 * in probe context -- that assures that a dtrace_sync() can be used
15960 15979 * to clear out any enablings referring to the old structures.
15961 15980 */
15962 15981 if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
15963 15982 kcred->cr_zone->zone_dtrace_getf != 0)
15964 15983 dtrace_sync();
15965 15984 }
15966 15985
15967 15986 /*
15968 15987 * DTrace Driver Cookbook Functions
15969 15988 */
15970 15989 /*ARGSUSED*/
15971 15990 static int
15972 15991 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
15973 15992 {
15974 15993 dtrace_provider_id_t id;
15975 15994 dtrace_state_t *state = NULL;
15976 15995 dtrace_enabling_t *enab;
15977 15996
15978 15997 mutex_enter(&cpu_lock);
15979 15998 mutex_enter(&dtrace_provider_lock);
15980 15999 mutex_enter(&dtrace_lock);
15981 16000
15982 16001 if (ddi_soft_state_init(&dtrace_softstate,
15983 16002 sizeof (dtrace_state_t), 0) != 0) {
15984 16003 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
15985 16004 mutex_exit(&cpu_lock);
15986 16005 mutex_exit(&dtrace_provider_lock);
15987 16006 mutex_exit(&dtrace_lock);
15988 16007 return (DDI_FAILURE);
15989 16008 }
15990 16009
15991 16010 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
15992 16011 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
15993 16012 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
15994 16013 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
15995 16014 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
15996 16015 ddi_remove_minor_node(devi, NULL);
15997 16016 ddi_soft_state_fini(&dtrace_softstate);
15998 16017 mutex_exit(&cpu_lock);
15999 16018 mutex_exit(&dtrace_provider_lock);
16000 16019 mutex_exit(&dtrace_lock);
16001 16020 return (DDI_FAILURE);
16002 16021 }
16003 16022
16004 16023 ddi_report_dev(devi);
16005 16024 dtrace_devi = devi;
16006 16025
16007 16026 dtrace_modload = dtrace_module_loaded;
16008 16027 dtrace_modunload = dtrace_module_unloaded;
16009 16028 dtrace_cpu_init = dtrace_cpu_setup_initial;
16010 16029 dtrace_helpers_cleanup = dtrace_helpers_destroy;
16011 16030 dtrace_helpers_fork = dtrace_helpers_duplicate;
16012 16031 dtrace_cpustart_init = dtrace_suspend;
16013 16032 dtrace_cpustart_fini = dtrace_resume;
16014 16033 dtrace_debugger_init = dtrace_suspend;
16015 16034 dtrace_debugger_fini = dtrace_resume;
16016 16035
16017 16036 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
16018 16037
16019 16038 ASSERT(MUTEX_HELD(&cpu_lock));
16020 16039
16021 16040 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
16022 16041 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
16023 16042 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
16024 16043 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
16025 16044 VM_SLEEP | VMC_IDENTIFIER);
16026 16045 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
16027 16046 1, INT_MAX, 0);
16028 16047
16029 16048 dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
16030 16049 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
16031 16050 NULL, NULL, NULL, NULL, NULL, 0);
16032 16051
16033 16052 ASSERT(MUTEX_HELD(&cpu_lock));
16034 16053 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
16035 16054 offsetof(dtrace_probe_t, dtpr_nextmod),
16036 16055 offsetof(dtrace_probe_t, dtpr_prevmod));
16037 16056
16038 16057 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
16039 16058 offsetof(dtrace_probe_t, dtpr_nextfunc),
16040 16059 offsetof(dtrace_probe_t, dtpr_prevfunc));
16041 16060
16042 16061 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
16043 16062 offsetof(dtrace_probe_t, dtpr_nextname),
16044 16063 offsetof(dtrace_probe_t, dtpr_prevname));
16045 16064
16046 16065 if (dtrace_retain_max < 1) {
16047 16066 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
16048 16067 "setting to 1", dtrace_retain_max);
16049 16068 dtrace_retain_max = 1;
16050 16069 }
16051 16070
16052 16071 /*
16053 16072 * Now discover our toxic ranges.
16054 16073 */
16055 16074 dtrace_toxic_ranges(dtrace_toxrange_add);
16056 16075
16057 16076 /*
16058 16077 * Before we register ourselves as a provider to our own framework,
16059 16078 * we would like to assert that dtrace_provider is NULL -- but that's
16060 16079 * not true if we were loaded as a dependency of a DTrace provider.
16061 16080 * Once we've registered, we can assert that dtrace_provider is our
16062 16081 * pseudo provider.
16063 16082 */
16064 16083 (void) dtrace_register("dtrace", &dtrace_provider_attr,
16065 16084 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
16066 16085
16067 16086 ASSERT(dtrace_provider != NULL);
16068 16087 ASSERT((dtrace_provider_id_t)dtrace_provider == id);
16069 16088
16070 16089 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
16071 16090 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
16072 16091 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
16073 16092 dtrace_provider, NULL, NULL, "END", 0, NULL);
16074 16093 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
16075 16094 dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
16076 16095
16077 16096 dtrace_anon_property();
16078 16097 mutex_exit(&cpu_lock);
16079 16098
16080 16099 /*
16081 16100 * If there are already providers, we must ask them to provide their
16082 16101 * probes, and then match any anonymous enabling against them. Note
16083 16102 * that there should be no other retained enablings at this time:
16084 16103 * the only retained enablings at this time should be the anonymous
16085 16104 * enabling.
16086 16105 */
16087 16106 if (dtrace_anon.dta_enabling != NULL) {
16088 16107 ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
16089 16108
16090 16109 dtrace_enabling_provide(NULL);
16091 16110 state = dtrace_anon.dta_state;
16092 16111
16093 16112 /*
16094 16113 * We couldn't hold cpu_lock across the above call to
16095 16114 * dtrace_enabling_provide(), but we must hold it to actually
16096 16115 * enable the probes. We have to drop all of our locks, pick
16097 16116 * up cpu_lock, and regain our locks before matching the
16098 16117 * retained anonymous enabling.
16099 16118 */
16100 16119 mutex_exit(&dtrace_lock);
16101 16120 mutex_exit(&dtrace_provider_lock);
16102 16121
16103 16122 mutex_enter(&cpu_lock);
16104 16123 mutex_enter(&dtrace_provider_lock);
16105 16124 mutex_enter(&dtrace_lock);
16106 16125
16107 16126 if ((enab = dtrace_anon.dta_enabling) != NULL)
16108 16127 (void) dtrace_enabling_match(enab, NULL);
16109 16128
16110 16129 mutex_exit(&cpu_lock);
16111 16130 }
16112 16131
16113 16132 mutex_exit(&dtrace_lock);
16114 16133 mutex_exit(&dtrace_provider_lock);
16115 16134
16116 16135 if (state != NULL) {
16117 16136 /*
16118 16137 * If we created any anonymous state, set it going now.
16119 16138 */
16120 16139 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
16121 16140 }
16122 16141
16123 16142 return (DDI_SUCCESS);
16124 16143 }
16125 16144
16126 16145 /*ARGSUSED*/
16127 16146 static int
16128 16147 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
16129 16148 {
16130 16149 dtrace_state_t *state;
16131 16150 uint32_t priv;
16132 16151 uid_t uid;
16133 16152 zoneid_t zoneid;
16134 16153
16135 16154 if (getminor(*devp) == DTRACEMNRN_HELPER)
16136 16155 return (0);
16137 16156
16138 16157 /*
16139 16158 * If this wasn't an open with the "helper" minor, then it must be
16140 16159 * the "dtrace" minor.
16141 16160 */
16142 16161 if (getminor(*devp) != DTRACEMNRN_DTRACE)
16143 16162 return (ENXIO);
16144 16163
16145 16164 /*
16146 16165 * If no DTRACE_PRIV_* bits are set in the credential, then the
16147 16166 * caller lacks sufficient permission to do anything with DTrace.
16148 16167 */
16149 16168 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
16150 16169 if (priv == DTRACE_PRIV_NONE)
16151 16170 return (EACCES);
16152 16171
16153 16172 /*
16154 16173 * Ask all providers to provide all their probes.
16155 16174 */
16156 16175 mutex_enter(&dtrace_provider_lock);
16157 16176 dtrace_probe_provide(NULL, NULL);
16158 16177 mutex_exit(&dtrace_provider_lock);
16159 16178
16160 16179 mutex_enter(&cpu_lock);
16161 16180 mutex_enter(&dtrace_lock);
16162 16181 dtrace_opens++;
16163 16182 dtrace_membar_producer();
16164 16183
16165 16184 /*
16166 16185 * If the kernel debugger is active (that is, if the kernel debugger
16167 16186 * modified text in some way), we won't allow the open.
16168 16187 */
16169 16188 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
16170 16189 dtrace_opens--;
16171 16190 mutex_exit(&cpu_lock);
16172 16191 mutex_exit(&dtrace_lock);
16173 16192 return (EBUSY);
16174 16193 }
16175 16194
16176 16195 if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) {
16177 16196 /*
16178 16197 * If DTrace helper tracing is enabled, we need to allocate the
16179 16198 * trace buffer and initialize the values.
16180 16199 */
16181 16200 dtrace_helptrace_buffer =
16182 16201 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
16183 16202 dtrace_helptrace_next = 0;
16184 16203 dtrace_helptrace_wrapped = 0;
16185 16204 dtrace_helptrace_enable = 0;
16186 16205 }
16187 16206
16188 16207 state = dtrace_state_create(devp, cred_p);
16189 16208 mutex_exit(&cpu_lock);
16190 16209
16191 16210 if (state == NULL) {
16192 16211 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
16193 16212 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16194 16213 mutex_exit(&dtrace_lock);
16195 16214 return (EAGAIN);
16196 16215 }
16197 16216
16198 16217 mutex_exit(&dtrace_lock);
16199 16218
16200 16219 return (0);
16201 16220 }
16202 16221
16203 16222 /*ARGSUSED*/
16204 16223 static int
16205 16224 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
16206 16225 {
16207 16226 minor_t minor = getminor(dev);
16208 16227 dtrace_state_t *state;
16209 16228 dtrace_helptrace_t *buf = NULL;
16210 16229
16211 16230 if (minor == DTRACEMNRN_HELPER)
16212 16231 return (0);
16213 16232
16214 16233 state = ddi_get_soft_state(dtrace_softstate, minor);
16215 16234
16216 16235 mutex_enter(&cpu_lock);
16217 16236 mutex_enter(&dtrace_lock);
16218 16237
16219 16238 if (state->dts_anon) {
16220 16239 /*
16221 16240 * There is anonymous state. Destroy that first.
16222 16241 */
16223 16242 ASSERT(dtrace_anon.dta_state == NULL);
16224 16243 dtrace_state_destroy(state->dts_anon);
16225 16244 }
16226 16245
16227 16246 if (dtrace_helptrace_disable) {
16228 16247 /*
16229 16248 * If we have been told to disable helper tracing, set the
16230 16249 * buffer to NULL before calling into dtrace_state_destroy();
16231 16250 * we take advantage of its dtrace_sync() to know that no
16232 16251 * CPU is in probe context with enabled helper tracing
16233 16252 * after it returns.
16234 16253 */
16235 16254 buf = dtrace_helptrace_buffer;
16236 16255 dtrace_helptrace_buffer = NULL;
16237 16256 }
16238 16257
16239 16258 dtrace_state_destroy(state);
16240 16259 ASSERT(dtrace_opens > 0);
16241 16260
16242 16261 /*
16243 16262 * Only relinquish control of the kernel debugger interface when there
16244 16263 * are no consumers and no anonymous enablings.
16245 16264 */
16246 16265 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
16247 16266 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16248 16267
16249 16268 if (buf != NULL) {
16250 16269 kmem_free(buf, dtrace_helptrace_bufsize);
16251 16270 dtrace_helptrace_disable = 0;
16252 16271 }
16253 16272
16254 16273 mutex_exit(&dtrace_lock);
16255 16274 mutex_exit(&cpu_lock);
16256 16275
16257 16276 return (0);
16258 16277 }
16259 16278
16260 16279 /*ARGSUSED*/
16261 16280 static int
16262 16281 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
16263 16282 {
16264 16283 int rval;
16265 16284 dof_helper_t help, *dhp = NULL;
16266 16285
16267 16286 switch (cmd) {
16268 16287 case DTRACEHIOC_ADDDOF:
16269 16288 if (copyin((void *)arg, &help, sizeof (help)) != 0) {
16270 16289 dtrace_dof_error(NULL, "failed to copyin DOF helper");
16271 16290 return (EFAULT);
16272 16291 }
16273 16292
16274 16293 dhp = &help;
16275 16294 arg = (intptr_t)help.dofhp_dof;
16276 16295 /*FALLTHROUGH*/
16277 16296
16278 16297 case DTRACEHIOC_ADD: {
16279 16298 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
16280 16299
16281 16300 if (dof == NULL)
16282 16301 return (rval);
16283 16302
16284 16303 mutex_enter(&dtrace_lock);
16285 16304
16286 16305 /*
16287 16306 * dtrace_helper_slurp() takes responsibility for the dof --
16288 16307 * it may free it now or it may save it and free it later.
16289 16308 */
16290 16309 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
16291 16310 *rv = rval;
16292 16311 rval = 0;
16293 16312 } else {
16294 16313 rval = EINVAL;
16295 16314 }
16296 16315
16297 16316 mutex_exit(&dtrace_lock);
16298 16317 return (rval);
16299 16318 }
16300 16319
16301 16320 case DTRACEHIOC_REMOVE: {
16302 16321 mutex_enter(&dtrace_lock);
16303 16322 rval = dtrace_helper_destroygen(arg);
16304 16323 mutex_exit(&dtrace_lock);
16305 16324
16306 16325 return (rval);
16307 16326 }
16308 16327
16309 16328 default:
16310 16329 break;
16311 16330 }
16312 16331
16313 16332 return (ENOTTY);
16314 16333 }
16315 16334
16316 16335 /*ARGSUSED*/
16317 16336 static int
16318 16337 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
16319 16338 {
16320 16339 minor_t minor = getminor(dev);
16321 16340 dtrace_state_t *state;
16322 16341 int rval;
16323 16342
16324 16343 if (minor == DTRACEMNRN_HELPER)
16325 16344 return (dtrace_ioctl_helper(cmd, arg, rv));
16326 16345
16327 16346 state = ddi_get_soft_state(dtrace_softstate, minor);
16328 16347
16329 16348 if (state->dts_anon) {
16330 16349 ASSERT(dtrace_anon.dta_state == NULL);
16331 16350 state = state->dts_anon;
16332 16351 }
16333 16352
16334 16353 switch (cmd) {
16335 16354 case DTRACEIOC_PROVIDER: {
16336 16355 dtrace_providerdesc_t pvd;
16337 16356 dtrace_provider_t *pvp;
16338 16357
16339 16358 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
16340 16359 return (EFAULT);
16341 16360
16342 16361 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
16343 16362 mutex_enter(&dtrace_provider_lock);
16344 16363
16345 16364 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
16346 16365 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
16347 16366 break;
16348 16367 }
16349 16368
16350 16369 mutex_exit(&dtrace_provider_lock);
16351 16370
16352 16371 if (pvp == NULL)
16353 16372 return (ESRCH);
16354 16373
16355 16374 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
16356 16375 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
16357 16376 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
16358 16377 return (EFAULT);
16359 16378
16360 16379 return (0);
16361 16380 }
16362 16381
16363 16382 case DTRACEIOC_EPROBE: {
16364 16383 dtrace_eprobedesc_t epdesc;
16365 16384 dtrace_ecb_t *ecb;
16366 16385 dtrace_action_t *act;
16367 16386 void *buf;
16368 16387 size_t size;
16369 16388 uintptr_t dest;
16370 16389 int nrecs;
16371 16390
16372 16391 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
16373 16392 return (EFAULT);
16374 16393
16375 16394 mutex_enter(&dtrace_lock);
16376 16395
16377 16396 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
16378 16397 mutex_exit(&dtrace_lock);
16379 16398 return (EINVAL);
16380 16399 }
16381 16400
16382 16401 if (ecb->dte_probe == NULL) {
16383 16402 mutex_exit(&dtrace_lock);
16384 16403 return (EINVAL);
16385 16404 }
16386 16405
16387 16406 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
16388 16407 epdesc.dtepd_uarg = ecb->dte_uarg;
16389 16408 epdesc.dtepd_size = ecb->dte_size;
16390 16409
16391 16410 nrecs = epdesc.dtepd_nrecs;
16392 16411 epdesc.dtepd_nrecs = 0;
16393 16412 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
16394 16413 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
16395 16414 continue;
16396 16415
16397 16416 epdesc.dtepd_nrecs++;
16398 16417 }
16399 16418
16400 16419 /*
16401 16420 * Now that we have the size, we need to allocate a temporary
16402 16421 * buffer in which to store the complete description. We need
16403 16422 * the temporary buffer to be able to drop dtrace_lock()
16404 16423 * across the copyout(), below.
16405 16424 */
16406 16425 size = sizeof (dtrace_eprobedesc_t) +
16407 16426 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
16408 16427
16409 16428 buf = kmem_alloc(size, KM_SLEEP);
16410 16429 dest = (uintptr_t)buf;
16411 16430
16412 16431 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
16413 16432 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
16414 16433
16415 16434 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
16416 16435 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
16417 16436 continue;
16418 16437
16419 16438 if (nrecs-- == 0)
16420 16439 break;
16421 16440
16422 16441 bcopy(&act->dta_rec, (void *)dest,
16423 16442 sizeof (dtrace_recdesc_t));
16424 16443 dest += sizeof (dtrace_recdesc_t);
16425 16444 }
16426 16445
16427 16446 mutex_exit(&dtrace_lock);
16428 16447
16429 16448 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
16430 16449 kmem_free(buf, size);
16431 16450 return (EFAULT);
16432 16451 }
16433 16452
16434 16453 kmem_free(buf, size);
16435 16454 return (0);
16436 16455 }
16437 16456
16438 16457 case DTRACEIOC_AGGDESC: {
16439 16458 dtrace_aggdesc_t aggdesc;
16440 16459 dtrace_action_t *act;
16441 16460 dtrace_aggregation_t *agg;
16442 16461 int nrecs;
16443 16462 uint32_t offs;
16444 16463 dtrace_recdesc_t *lrec;
16445 16464 void *buf;
16446 16465 size_t size;
16447 16466 uintptr_t dest;
16448 16467
16449 16468 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
16450 16469 return (EFAULT);
16451 16470
16452 16471 mutex_enter(&dtrace_lock);
16453 16472
16454 16473 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
16455 16474 mutex_exit(&dtrace_lock);
16456 16475 return (EINVAL);
16457 16476 }
16458 16477
16459 16478 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
16460 16479
16461 16480 nrecs = aggdesc.dtagd_nrecs;
16462 16481 aggdesc.dtagd_nrecs = 0;
16463 16482
16464 16483 offs = agg->dtag_base;
16465 16484 lrec = &agg->dtag_action.dta_rec;
16466 16485 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
16467 16486
16468 16487 for (act = agg->dtag_first; ; act = act->dta_next) {
16469 16488 ASSERT(act->dta_intuple ||
16470 16489 DTRACEACT_ISAGG(act->dta_kind));
16471 16490
16472 16491 /*
16473 16492 * If this action has a record size of zero, it
16474 16493 * denotes an argument to the aggregating action.
16475 16494 * Because the presence of this record doesn't (or
16476 16495 * shouldn't) affect the way the data is interpreted,
16477 16496 * we don't copy it out to save user-level the
16478 16497 * confusion of dealing with a zero-length record.
16479 16498 */
16480 16499 if (act->dta_rec.dtrd_size == 0) {
16481 16500 ASSERT(agg->dtag_hasarg);
16482 16501 continue;
16483 16502 }
16484 16503
16485 16504 aggdesc.dtagd_nrecs++;
16486 16505
16487 16506 if (act == &agg->dtag_action)
16488 16507 break;
16489 16508 }
16490 16509
16491 16510 /*
16492 16511 * Now that we have the size, we need to allocate a temporary
16493 16512 * buffer in which to store the complete description. We need
16494 16513 * the temporary buffer to be able to drop dtrace_lock()
16495 16514 * across the copyout(), below.
16496 16515 */
16497 16516 size = sizeof (dtrace_aggdesc_t) +
16498 16517 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
16499 16518
16500 16519 buf = kmem_alloc(size, KM_SLEEP);
16501 16520 dest = (uintptr_t)buf;
16502 16521
16503 16522 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
16504 16523 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
16505 16524
16506 16525 for (act = agg->dtag_first; ; act = act->dta_next) {
16507 16526 dtrace_recdesc_t rec = act->dta_rec;
16508 16527
16509 16528 /*
16510 16529 * See the comment in the above loop for why we pass
16511 16530 * over zero-length records.
16512 16531 */
16513 16532 if (rec.dtrd_size == 0) {
16514 16533 ASSERT(agg->dtag_hasarg);
16515 16534 continue;
16516 16535 }
16517 16536
16518 16537 if (nrecs-- == 0)
16519 16538 break;
16520 16539
16521 16540 rec.dtrd_offset -= offs;
16522 16541 bcopy(&rec, (void *)dest, sizeof (rec));
16523 16542 dest += sizeof (dtrace_recdesc_t);
16524 16543
16525 16544 if (act == &agg->dtag_action)
16526 16545 break;
16527 16546 }
16528 16547
16529 16548 mutex_exit(&dtrace_lock);
16530 16549
16531 16550 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
16532 16551 kmem_free(buf, size);
16533 16552 return (EFAULT);
16534 16553 }
16535 16554
16536 16555 kmem_free(buf, size);
16537 16556 return (0);
16538 16557 }
16539 16558
16540 16559 case DTRACEIOC_ENABLE: {
16541 16560 dof_hdr_t *dof;
16542 16561 dtrace_enabling_t *enab = NULL;
16543 16562 dtrace_vstate_t *vstate;
16544 16563 int err = 0;
16545 16564
16546 16565 *rv = 0;
16547 16566
16548 16567 /*
16549 16568 * If a NULL argument has been passed, we take this as our
16550 16569 * cue to reevaluate our enablings.
16551 16570 */
16552 16571 if (arg == NULL) {
16553 16572 dtrace_enabling_matchall();
16554 16573
16555 16574 return (0);
16556 16575 }
16557 16576
16558 16577 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
16559 16578 return (rval);
16560 16579
16561 16580 mutex_enter(&cpu_lock);
16562 16581 mutex_enter(&dtrace_lock);
16563 16582 vstate = &state->dts_vstate;
16564 16583
16565 16584 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
16566 16585 mutex_exit(&dtrace_lock);
16567 16586 mutex_exit(&cpu_lock);
16568 16587 dtrace_dof_destroy(dof);
16569 16588 return (EBUSY);
16570 16589 }
16571 16590
16572 16591 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
16573 16592 mutex_exit(&dtrace_lock);
16574 16593 mutex_exit(&cpu_lock);
16575 16594 dtrace_dof_destroy(dof);
16576 16595 return (EINVAL);
16577 16596 }
16578 16597
16579 16598 if ((rval = dtrace_dof_options(dof, state)) != 0) {
16580 16599 dtrace_enabling_destroy(enab);
16581 16600 mutex_exit(&dtrace_lock);
16582 16601 mutex_exit(&cpu_lock);
16583 16602 dtrace_dof_destroy(dof);
16584 16603 return (rval);
16585 16604 }
16586 16605
16587 16606 if ((err = dtrace_enabling_match(enab, rv)) == 0) {
16588 16607 err = dtrace_enabling_retain(enab);
16589 16608 } else {
16590 16609 dtrace_enabling_destroy(enab);
16591 16610 }
16592 16611
16593 16612 mutex_exit(&cpu_lock);
16594 16613 mutex_exit(&dtrace_lock);
16595 16614 dtrace_dof_destroy(dof);
16596 16615
16597 16616 return (err);
16598 16617 }
16599 16618
16600 16619 case DTRACEIOC_REPLICATE: {
16601 16620 dtrace_repldesc_t desc;
16602 16621 dtrace_probedesc_t *match = &desc.dtrpd_match;
16603 16622 dtrace_probedesc_t *create = &desc.dtrpd_create;
16604 16623 int err;
16605 16624
16606 16625 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
16607 16626 return (EFAULT);
16608 16627
16609 16628 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
16610 16629 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
16611 16630 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
16612 16631 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
16613 16632
16614 16633 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
16615 16634 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
16616 16635 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
16617 16636 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
16618 16637
16619 16638 mutex_enter(&dtrace_lock);
16620 16639 err = dtrace_enabling_replicate(state, match, create);
16621 16640 mutex_exit(&dtrace_lock);
16622 16641
16623 16642 return (err);
16624 16643 }
16625 16644
16626 16645 case DTRACEIOC_PROBEMATCH:
16627 16646 case DTRACEIOC_PROBES: {
16628 16647 dtrace_probe_t *probe = NULL;
16629 16648 dtrace_probedesc_t desc;
16630 16649 dtrace_probekey_t pkey;
16631 16650 dtrace_id_t i;
16632 16651 int m = 0;
16633 16652 uint32_t priv;
16634 16653 uid_t uid;
16635 16654 zoneid_t zoneid;
16636 16655
16637 16656 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
16638 16657 return (EFAULT);
16639 16658
16640 16659 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
16641 16660 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
16642 16661 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
16643 16662 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
16644 16663
16645 16664 /*
16646 16665 * Before we attempt to match this probe, we want to give
16647 16666 * all providers the opportunity to provide it.
16648 16667 */
16649 16668 if (desc.dtpd_id == DTRACE_IDNONE) {
16650 16669 mutex_enter(&dtrace_provider_lock);
16651 16670 dtrace_probe_provide(&desc, NULL);
16652 16671 mutex_exit(&dtrace_provider_lock);
16653 16672 desc.dtpd_id++;
16654 16673 }
16655 16674
16656 16675 if (cmd == DTRACEIOC_PROBEMATCH) {
16657 16676 dtrace_probekey(&desc, &pkey);
16658 16677 pkey.dtpk_id = DTRACE_IDNONE;
16659 16678 }
16660 16679
16661 16680 dtrace_cred2priv(cr, &priv, &uid, &zoneid);
16662 16681
16663 16682 mutex_enter(&dtrace_lock);
16664 16683
16665 16684 if (cmd == DTRACEIOC_PROBEMATCH) {
16666 16685 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
16667 16686 if ((probe = dtrace_probes[i - 1]) != NULL &&
16668 16687 (m = dtrace_match_probe(probe, &pkey,
16669 16688 priv, uid, zoneid)) != 0)
16670 16689 break;
16671 16690 }
16672 16691
16673 16692 if (m < 0) {
16674 16693 mutex_exit(&dtrace_lock);
16675 16694 return (EINVAL);
16676 16695 }
16677 16696
16678 16697 } else {
16679 16698 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
16680 16699 if ((probe = dtrace_probes[i - 1]) != NULL &&
16681 16700 dtrace_match_priv(probe, priv, uid, zoneid))
16682 16701 break;
16683 16702 }
16684 16703 }
16685 16704
16686 16705 if (probe == NULL) {
16687 16706 mutex_exit(&dtrace_lock);
16688 16707 return (ESRCH);
16689 16708 }
16690 16709
16691 16710 dtrace_probe_description(probe, &desc);
16692 16711 mutex_exit(&dtrace_lock);
16693 16712
16694 16713 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
16695 16714 return (EFAULT);
16696 16715
16697 16716 return (0);
16698 16717 }
16699 16718
16700 16719 case DTRACEIOC_PROBEARG: {
16701 16720 dtrace_argdesc_t desc;
16702 16721 dtrace_probe_t *probe;
16703 16722 dtrace_provider_t *prov;
16704 16723
16705 16724 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
16706 16725 return (EFAULT);
16707 16726
16708 16727 if (desc.dtargd_id == DTRACE_IDNONE)
16709 16728 return (EINVAL);
16710 16729
16711 16730 if (desc.dtargd_ndx == DTRACE_ARGNONE)
16712 16731 return (EINVAL);
16713 16732
16714 16733 mutex_enter(&dtrace_provider_lock);
16715 16734 mutex_enter(&mod_lock);
16716 16735 mutex_enter(&dtrace_lock);
16717 16736
16718 16737 if (desc.dtargd_id > dtrace_nprobes) {
16719 16738 mutex_exit(&dtrace_lock);
16720 16739 mutex_exit(&mod_lock);
16721 16740 mutex_exit(&dtrace_provider_lock);
16722 16741 return (EINVAL);
16723 16742 }
16724 16743
16725 16744 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
16726 16745 mutex_exit(&dtrace_lock);
16727 16746 mutex_exit(&mod_lock);
16728 16747 mutex_exit(&dtrace_provider_lock);
16729 16748 return (EINVAL);
16730 16749 }
16731 16750
16732 16751 mutex_exit(&dtrace_lock);
16733 16752
16734 16753 prov = probe->dtpr_provider;
16735 16754
16736 16755 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
16737 16756 /*
16738 16757 * There isn't any typed information for this probe.
16739 16758 * Set the argument number to DTRACE_ARGNONE.
16740 16759 */
16741 16760 desc.dtargd_ndx = DTRACE_ARGNONE;
16742 16761 } else {
16743 16762 desc.dtargd_native[0] = '\0';
16744 16763 desc.dtargd_xlate[0] = '\0';
16745 16764 desc.dtargd_mapping = desc.dtargd_ndx;
16746 16765
16747 16766 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
16748 16767 probe->dtpr_id, probe->dtpr_arg, &desc);
16749 16768 }
16750 16769
16751 16770 mutex_exit(&mod_lock);
16752 16771 mutex_exit(&dtrace_provider_lock);
16753 16772
16754 16773 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
16755 16774 return (EFAULT);
16756 16775
16757 16776 return (0);
16758 16777 }
16759 16778
16760 16779 case DTRACEIOC_GO: {
16761 16780 processorid_t cpuid;
16762 16781 rval = dtrace_state_go(state, &cpuid);
16763 16782
16764 16783 if (rval != 0)
16765 16784 return (rval);
16766 16785
16767 16786 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
16768 16787 return (EFAULT);
16769 16788
16770 16789 return (0);
16771 16790 }
16772 16791
16773 16792 case DTRACEIOC_STOP: {
16774 16793 processorid_t cpuid;
16775 16794
16776 16795 mutex_enter(&dtrace_lock);
16777 16796 rval = dtrace_state_stop(state, &cpuid);
16778 16797 mutex_exit(&dtrace_lock);
16779 16798
16780 16799 if (rval != 0)
16781 16800 return (rval);
16782 16801
16783 16802 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
16784 16803 return (EFAULT);
16785 16804
16786 16805 return (0);
16787 16806 }
16788 16807
16789 16808 case DTRACEIOC_DOFGET: {
16790 16809 dof_hdr_t hdr, *dof;
16791 16810 uint64_t len;
16792 16811
16793 16812 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
16794 16813 return (EFAULT);
16795 16814
16796 16815 mutex_enter(&dtrace_lock);
16797 16816 dof = dtrace_dof_create(state);
16798 16817 mutex_exit(&dtrace_lock);
16799 16818
16800 16819 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
16801 16820 rval = copyout(dof, (void *)arg, len);
16802 16821 dtrace_dof_destroy(dof);
16803 16822
16804 16823 return (rval == 0 ? 0 : EFAULT);
16805 16824 }
16806 16825
16807 16826 case DTRACEIOC_AGGSNAP:
16808 16827 case DTRACEIOC_BUFSNAP: {
16809 16828 dtrace_bufdesc_t desc;
16810 16829 caddr_t cached;
16811 16830 dtrace_buffer_t *buf;
16812 16831
16813 16832 if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
16814 16833 return (EFAULT);
16815 16834
16816 16835 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
16817 16836 return (EINVAL);
16818 16837
16819 16838 mutex_enter(&dtrace_lock);
16820 16839
16821 16840 if (cmd == DTRACEIOC_BUFSNAP) {
16822 16841 buf = &state->dts_buffer[desc.dtbd_cpu];
16823 16842 } else {
16824 16843 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
16825 16844 }
16826 16845
16827 16846 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
16828 16847 size_t sz = buf->dtb_offset;
16829 16848
16830 16849 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
16831 16850 mutex_exit(&dtrace_lock);
16832 16851 return (EBUSY);
16833 16852 }
16834 16853
16835 16854 /*
16836 16855 * If this buffer has already been consumed, we're
16837 16856 * going to indicate that there's nothing left here
16838 16857 * to consume.
16839 16858 */
16840 16859 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
16841 16860 mutex_exit(&dtrace_lock);
16842 16861
16843 16862 desc.dtbd_size = 0;
16844 16863 desc.dtbd_drops = 0;
16845 16864 desc.dtbd_errors = 0;
16846 16865 desc.dtbd_oldest = 0;
16847 16866 sz = sizeof (desc);
16848 16867
16849 16868 if (copyout(&desc, (void *)arg, sz) != 0)
16850 16869 return (EFAULT);
16851 16870
16852 16871 return (0);
16853 16872 }
16854 16873
16855 16874 /*
16856 16875 * If this is a ring buffer that has wrapped, we want
16857 16876 * to copy the whole thing out.
16858 16877 */
16859 16878 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
16860 16879 dtrace_buffer_polish(buf);
16861 16880 sz = buf->dtb_size;
16862 16881 }
16863 16882
16864 16883 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
16865 16884 mutex_exit(&dtrace_lock);
16866 16885 return (EFAULT);
16867 16886 }
16868 16887
16869 16888 desc.dtbd_size = sz;
16870 16889 desc.dtbd_drops = buf->dtb_drops;
16871 16890 desc.dtbd_errors = buf->dtb_errors;
16872 16891 desc.dtbd_oldest = buf->dtb_xamot_offset;
16873 16892 desc.dtbd_timestamp = dtrace_gethrtime();
16874 16893
16875 16894 mutex_exit(&dtrace_lock);
16876 16895
16877 16896 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
16878 16897 return (EFAULT);
16879 16898
16880 16899 buf->dtb_flags |= DTRACEBUF_CONSUMED;
16881 16900
16882 16901 return (0);
16883 16902 }
16884 16903
16885 16904 if (buf->dtb_tomax == NULL) {
16886 16905 ASSERT(buf->dtb_xamot == NULL);
16887 16906 mutex_exit(&dtrace_lock);
16888 16907 return (ENOENT);
16889 16908 }
16890 16909
16891 16910 cached = buf->dtb_tomax;
16892 16911 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
16893 16912
16894 16913 dtrace_xcall(desc.dtbd_cpu,
16895 16914 (dtrace_xcall_t)dtrace_buffer_switch, buf);
16896 16915
16897 16916 state->dts_errors += buf->dtb_xamot_errors;
16898 16917
16899 16918 /*
16900 16919 * If the buffers did not actually switch, then the cross call
16901 16920 * did not take place -- presumably because the given CPU is
16902 16921 * not in the ready set. If this is the case, we'll return
16903 16922 * ENOENT.
16904 16923 */
16905 16924 if (buf->dtb_tomax == cached) {
16906 16925 ASSERT(buf->dtb_xamot != cached);
16907 16926 mutex_exit(&dtrace_lock);
16908 16927 return (ENOENT);
16909 16928 }
16910 16929
16911 16930 ASSERT(cached == buf->dtb_xamot);
16912 16931
16913 16932 /*
16914 16933 * We have our snapshot; now copy it out.
16915 16934 */
16916 16935 if (copyout(buf->dtb_xamot, desc.dtbd_data,
16917 16936 buf->dtb_xamot_offset) != 0) {
16918 16937 mutex_exit(&dtrace_lock);
16919 16938 return (EFAULT);
16920 16939 }
16921 16940
16922 16941 desc.dtbd_size = buf->dtb_xamot_offset;
16923 16942 desc.dtbd_drops = buf->dtb_xamot_drops;
16924 16943 desc.dtbd_errors = buf->dtb_xamot_errors;
16925 16944 desc.dtbd_oldest = 0;
16926 16945 desc.dtbd_timestamp = buf->dtb_switched;
16927 16946
16928 16947 mutex_exit(&dtrace_lock);
16929 16948
16930 16949 /*
16931 16950 * Finally, copy out the buffer description.
16932 16951 */
16933 16952 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
16934 16953 return (EFAULT);
16935 16954
16936 16955 return (0);
16937 16956 }
16938 16957
16939 16958 case DTRACEIOC_CONF: {
16940 16959 dtrace_conf_t conf;
16941 16960
16942 16961 bzero(&conf, sizeof (conf));
16943 16962 conf.dtc_difversion = DIF_VERSION;
16944 16963 conf.dtc_difintregs = DIF_DIR_NREGS;
16945 16964 conf.dtc_diftupregs = DIF_DTR_NREGS;
16946 16965 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
16947 16966
16948 16967 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
16949 16968 return (EFAULT);
16950 16969
16951 16970 return (0);
16952 16971 }
16953 16972
16954 16973 case DTRACEIOC_STATUS: {
16955 16974 dtrace_status_t stat;
16956 16975 dtrace_dstate_t *dstate;
16957 16976 int i, j;
16958 16977 uint64_t nerrs;
16959 16978
16960 16979 /*
16961 16980 * See the comment in dtrace_state_deadman() for the reason
16962 16981 * for setting dts_laststatus to INT64_MAX before setting
16963 16982 * it to the correct value.
16964 16983 */
16965 16984 state->dts_laststatus = INT64_MAX;
16966 16985 dtrace_membar_producer();
16967 16986 state->dts_laststatus = dtrace_gethrtime();
16968 16987
16969 16988 bzero(&stat, sizeof (stat));
16970 16989
16971 16990 mutex_enter(&dtrace_lock);
16972 16991
16973 16992 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
16974 16993 mutex_exit(&dtrace_lock);
16975 16994 return (ENOENT);
16976 16995 }
16977 16996
16978 16997 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
16979 16998 stat.dtst_exiting = 1;
16980 16999
16981 17000 nerrs = state->dts_errors;
16982 17001 dstate = &state->dts_vstate.dtvs_dynvars;
16983 17002
16984 17003 for (i = 0; i < NCPU; i++) {
16985 17004 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
16986 17005
16987 17006 stat.dtst_dyndrops += dcpu->dtdsc_drops;
16988 17007 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
16989 17008 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
16990 17009
16991 17010 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
16992 17011 stat.dtst_filled++;
16993 17012
16994 17013 nerrs += state->dts_buffer[i].dtb_errors;
16995 17014
16996 17015 for (j = 0; j < state->dts_nspeculations; j++) {
16997 17016 dtrace_speculation_t *spec;
16998 17017 dtrace_buffer_t *buf;
16999 17018
17000 17019 spec = &state->dts_speculations[j];
17001 17020 buf = &spec->dtsp_buffer[i];
17002 17021 stat.dtst_specdrops += buf->dtb_xamot_drops;
17003 17022 }
17004 17023 }
17005 17024
17006 17025 stat.dtst_specdrops_busy = state->dts_speculations_busy;
17007 17026 stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
17008 17027 stat.dtst_stkstroverflows = state->dts_stkstroverflows;
17009 17028 stat.dtst_dblerrors = state->dts_dblerrors;
17010 17029 stat.dtst_killed =
17011 17030 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
17012 17031 stat.dtst_errors = nerrs;
17013 17032
17014 17033 mutex_exit(&dtrace_lock);
17015 17034
17016 17035 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
17017 17036 return (EFAULT);
17018 17037
17019 17038 return (0);
17020 17039 }
17021 17040
17022 17041 case DTRACEIOC_FORMAT: {
17023 17042 dtrace_fmtdesc_t fmt;
17024 17043 char *str;
17025 17044 int len;
17026 17045
17027 17046 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
17028 17047 return (EFAULT);
17029 17048
17030 17049 mutex_enter(&dtrace_lock);
17031 17050
17032 17051 if (fmt.dtfd_format == 0 ||
17033 17052 fmt.dtfd_format > state->dts_nformats) {
17034 17053 mutex_exit(&dtrace_lock);
17035 17054 return (EINVAL);
17036 17055 }
17037 17056
17038 17057 /*
17039 17058 * Format strings are allocated contiguously and they are
17040 17059 * never freed; if a format index is less than the number
17041 17060 * of formats, we can assert that the format map is non-NULL
17042 17061 * and that the format for the specified index is non-NULL.
17043 17062 */
17044 17063 ASSERT(state->dts_formats != NULL);
17045 17064 str = state->dts_formats[fmt.dtfd_format - 1];
17046 17065 ASSERT(str != NULL);
17047 17066
17048 17067 len = strlen(str) + 1;
17049 17068
17050 17069 if (len > fmt.dtfd_length) {
17051 17070 fmt.dtfd_length = len;
17052 17071
17053 17072 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
17054 17073 mutex_exit(&dtrace_lock);
17055 17074 return (EINVAL);
17056 17075 }
17057 17076 } else {
17058 17077 if (copyout(str, fmt.dtfd_string, len) != 0) {
17059 17078 mutex_exit(&dtrace_lock);
17060 17079 return (EINVAL);
17061 17080 }
17062 17081 }
17063 17082
17064 17083 mutex_exit(&dtrace_lock);
17065 17084 return (0);
17066 17085 }
17067 17086
17068 17087 default:
17069 17088 break;
17070 17089 }
17071 17090
17072 17091 return (ENOTTY);
17073 17092 }
17074 17093
17075 17094 /*ARGSUSED*/
17076 17095 static int
17077 17096 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
17078 17097 {
17079 17098 dtrace_state_t *state;
17080 17099
17081 17100 switch (cmd) {
17082 17101 case DDI_DETACH:
17083 17102 break;
17084 17103
17085 17104 case DDI_SUSPEND:
17086 17105 return (DDI_SUCCESS);
17087 17106
17088 17107 default:
17089 17108 return (DDI_FAILURE);
17090 17109 }
17091 17110
17092 17111 mutex_enter(&cpu_lock);
17093 17112 mutex_enter(&dtrace_provider_lock);
17094 17113 mutex_enter(&dtrace_lock);
17095 17114
17096 17115 ASSERT(dtrace_opens == 0);
17097 17116
17098 17117 if (dtrace_helpers > 0) {
17099 17118 mutex_exit(&dtrace_provider_lock);
17100 17119 mutex_exit(&dtrace_lock);
17101 17120 mutex_exit(&cpu_lock);
17102 17121 return (DDI_FAILURE);
17103 17122 }
17104 17123
17105 17124 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
17106 17125 mutex_exit(&dtrace_provider_lock);
17107 17126 mutex_exit(&dtrace_lock);
17108 17127 mutex_exit(&cpu_lock);
17109 17128 return (DDI_FAILURE);
17110 17129 }
17111 17130
17112 17131 dtrace_provider = NULL;
17113 17132
17114 17133 if ((state = dtrace_anon_grab()) != NULL) {
17115 17134 /*
17116 17135 * If there were ECBs on this state, the provider should
17117 17136 * have not been allowed to detach; assert that there is
17118 17137 * none.
17119 17138 */
17120 17139 ASSERT(state->dts_necbs == 0);
17121 17140 dtrace_state_destroy(state);
17122 17141
17123 17142 /*
17124 17143 * If we're being detached with anonymous state, we need to
17125 17144 * indicate to the kernel debugger that DTrace is now inactive.
17126 17145 */
17127 17146 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
17128 17147 }
17129 17148
17130 17149 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
17131 17150 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
17132 17151 dtrace_cpu_init = NULL;
17133 17152 dtrace_helpers_cleanup = NULL;
17134 17153 dtrace_helpers_fork = NULL;
17135 17154 dtrace_cpustart_init = NULL;
17136 17155 dtrace_cpustart_fini = NULL;
17137 17156 dtrace_debugger_init = NULL;
17138 17157 dtrace_debugger_fini = NULL;
17139 17158 dtrace_modload = NULL;
17140 17159 dtrace_modunload = NULL;
17141 17160
17142 17161 ASSERT(dtrace_getf == 0);
17143 17162 ASSERT(dtrace_closef == NULL);
17144 17163
17145 17164 mutex_exit(&cpu_lock);
17146 17165
17147 17166 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
17148 17167 dtrace_probes = NULL;
17149 17168 dtrace_nprobes = 0;
17150 17169
17151 17170 dtrace_hash_destroy(dtrace_bymod);
17152 17171 dtrace_hash_destroy(dtrace_byfunc);
17153 17172 dtrace_hash_destroy(dtrace_byname);
17154 17173 dtrace_bymod = NULL;
17155 17174 dtrace_byfunc = NULL;
17156 17175 dtrace_byname = NULL;
17157 17176
17158 17177 kmem_cache_destroy(dtrace_state_cache);
17159 17178 vmem_destroy(dtrace_minor);
17160 17179 vmem_destroy(dtrace_arena);
17161 17180
17162 17181 if (dtrace_toxrange != NULL) {
17163 17182 kmem_free(dtrace_toxrange,
17164 17183 dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
17165 17184 dtrace_toxrange = NULL;
17166 17185 dtrace_toxranges = 0;
17167 17186 dtrace_toxranges_max = 0;
17168 17187 }
17169 17188
17170 17189 ddi_remove_minor_node(dtrace_devi, NULL);
17171 17190 dtrace_devi = NULL;
17172 17191
17173 17192 ddi_soft_state_fini(&dtrace_softstate);
17174 17193
17175 17194 ASSERT(dtrace_vtime_references == 0);
17176 17195 ASSERT(dtrace_opens == 0);
17177 17196 ASSERT(dtrace_retained == NULL);
17178 17197
17179 17198 mutex_exit(&dtrace_lock);
17180 17199 mutex_exit(&dtrace_provider_lock);
17181 17200
17182 17201 /*
17183 17202 * We don't destroy the task queue until after we have dropped our
17184 17203 * locks (taskq_destroy() may block on running tasks). To prevent
17185 17204 * attempting to do work after we have effectively detached but before
17186 17205 * the task queue has been destroyed, all tasks dispatched via the
17187 17206 * task queue must check that DTrace is still attached before
17188 17207 * performing any operation.
17189 17208 */
17190 17209 taskq_destroy(dtrace_taskq);
17191 17210 dtrace_taskq = NULL;
17192 17211
17193 17212 return (DDI_SUCCESS);
17194 17213 }
17195 17214
17196 17215 /*ARGSUSED*/
17197 17216 static int
17198 17217 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
17199 17218 {
17200 17219 int error;
17201 17220
17202 17221 switch (infocmd) {
17203 17222 case DDI_INFO_DEVT2DEVINFO:
17204 17223 *result = (void *)dtrace_devi;
17205 17224 error = DDI_SUCCESS;
17206 17225 break;
17207 17226 case DDI_INFO_DEVT2INSTANCE:
17208 17227 *result = (void *)0;
17209 17228 error = DDI_SUCCESS;
17210 17229 break;
17211 17230 default:
17212 17231 error = DDI_FAILURE;
17213 17232 }
17214 17233 return (error);
17215 17234 }
17216 17235
17217 17236 static struct cb_ops dtrace_cb_ops = {
17218 17237 dtrace_open, /* open */
17219 17238 dtrace_close, /* close */
17220 17239 nulldev, /* strategy */
17221 17240 nulldev, /* print */
17222 17241 nodev, /* dump */
17223 17242 nodev, /* read */
17224 17243 nodev, /* write */
17225 17244 dtrace_ioctl, /* ioctl */
17226 17245 nodev, /* devmap */
17227 17246 nodev, /* mmap */
17228 17247 nodev, /* segmap */
17229 17248 nochpoll, /* poll */
17230 17249 ddi_prop_op, /* cb_prop_op */
17231 17250 0, /* streamtab */
17232 17251 D_NEW | D_MP /* Driver compatibility flag */
17233 17252 };
17234 17253
17235 17254 static struct dev_ops dtrace_ops = {
17236 17255 DEVO_REV, /* devo_rev */
17237 17256 0, /* refcnt */
17238 17257 dtrace_info, /* get_dev_info */
17239 17258 nulldev, /* identify */
17240 17259 nulldev, /* probe */
17241 17260 dtrace_attach, /* attach */
17242 17261 dtrace_detach, /* detach */
17243 17262 nodev, /* reset */
17244 17263 &dtrace_cb_ops, /* driver operations */
17245 17264 NULL, /* bus operations */
17246 17265 nodev, /* dev power */
17247 17266 ddi_quiesce_not_needed, /* quiesce */
17248 17267 };
17249 17268
17250 17269 static struct modldrv modldrv = {
17251 17270 &mod_driverops, /* module type (this is a pseudo driver) */
17252 17271 "Dynamic Tracing", /* name of module */
17253 17272 &dtrace_ops, /* driver ops */
17254 17273 };
17255 17274
17256 17275 static struct modlinkage modlinkage = {
17257 17276 MODREV_1,
17258 17277 (void *)&modldrv,
17259 17278 NULL
17260 17279 };
17261 17280
17262 17281 int
17263 17282 _init(void)
17264 17283 {
17265 17284 return (mod_install(&modlinkage));
17266 17285 }
17267 17286
17268 17287 int
17269 17288 _info(struct modinfo *modinfop)
17270 17289 {
17271 17290 return (mod_info(&modlinkage, modinfop));
17272 17291 }
17273 17292
17274 17293 int
17275 17294 _fini(void)
17276 17295 {
17277 17296 return (mod_remove(&modlinkage));
17278 17297 }
↓ open down ↓ |
11518 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX