153 * available to DTrace consumers via the backtick (`) syntax. One of these,
154 * dtrace_zero, is made deliberately so: it is provided as a source of
155 * well-known, zero-filled memory. While this variable is not documented,
156 * it is used by some translators as an implementation detail.
157 */
158 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
159
160 /*
161 * DTrace Internal Variables
162 */
163 static dev_info_t *dtrace_devi; /* device info */
164 static vmem_t *dtrace_arena; /* probe ID arena */
165 static vmem_t *dtrace_minor; /* minor number arena */
166 static taskq_t *dtrace_taskq; /* task queue */
167 static dtrace_probe_t **dtrace_probes; /* array of all probes */
168 static int dtrace_nprobes; /* number of probes */
169 static dtrace_provider_t *dtrace_provider; /* provider list */
170 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
171 static int dtrace_opens; /* number of opens */
172 static int dtrace_helpers; /* number of helpers */
173 static void *dtrace_softstate; /* softstate pointer */
174 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
175 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
176 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
177 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
178 static int dtrace_toxranges; /* number of toxic ranges */
179 static int dtrace_toxranges_max; /* size of toxic range array */
180 static dtrace_anon_t dtrace_anon; /* anonymous enabling */
181 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
182 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
183 static kthread_t *dtrace_panicked; /* panicking thread */
184 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
185 static dtrace_genid_t dtrace_probegen; /* current probe generation */
186 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
187 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
188 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
189 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
190 static int dtrace_dynvar_failclean; /* dynvars failed to clean */
191
192 /*
458
459 static size_t dtrace_strlen(const char *, size_t);
460 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
461 static void dtrace_enabling_provide(dtrace_provider_t *);
462 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
463 static void dtrace_enabling_matchall(void);
464 static void dtrace_enabling_reap(void);
465 static dtrace_state_t *dtrace_anon_grab(void);
466 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
467 dtrace_state_t *, uint64_t, uint64_t);
468 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
469 static void dtrace_buffer_drop(dtrace_buffer_t *);
470 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
471 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
472 dtrace_state_t *, dtrace_mstate_t *);
473 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
474 dtrace_optval_t);
475 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
476 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
477 static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
478
479 /*
480 * DTrace Probe Context Functions
481 *
482 * These functions are called from probe context. Because probe context is
483 * any context in which C may be called, arbitrarily locks may be held,
484 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
485 * As a result, functions called from probe context may only call other DTrace
486 * support functions -- they may not interact at all with the system at large.
487 * (Note that the ASSERT macro is made probe-context safe by redefining it in
488 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
489 * loads are to be performed from probe context, they _must_ be in terms of
490 * the safe dtrace_load*() variants.
491 *
492 * Some functions in this block are not actually called from probe context;
493 * for these functions, there will be a comment above the function reading
494 * "Note: not called from probe context."
495 */
496 void
497 dtrace_panic(const char *format, ...)
669 return (1);
670
671 return (0);
672 }
673
674
675 /*
676 * Convenience routine to check to see if the address is within a memory
677 * region in which a load may be issued given the user's privilege level;
678 * if not, it sets the appropriate error flags and loads 'addr' into the
679 * illegal value slot.
680 *
681 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
682 * appropriate memory access protection.
683 */
684 static int
685 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
686 dtrace_vstate_t *vstate)
687 {
688 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
689
690 /*
691 * If we hold the privilege to read from kernel memory, then
692 * everything is readable.
693 */
694 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
695 return (1);
696
697 /*
698 * You can obviously read that which you can store.
699 */
700 if (dtrace_canstore(addr, sz, mstate, vstate))
701 return (1);
702
703 /*
704 * We're allowed to read from our own string table.
705 */
706 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
707 mstate->dtms_difo->dtdo_strlen))
708 return (1);
734 sz, curthread->t_procp, sizeof (proc_t))) {
735 return (1);
736 }
737
738 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
739 curthread->t_cred, sizeof (cred_t))) {
740 return (1);
741 }
742
743 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
744 &(p->p_pidp->pid_id), sizeof (pid_t))) {
745 return (1);
746 }
747
748 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
749 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
750 return (1);
751 }
752 }
753
754 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
755 *illval = addr;
756 return (0);
757 }
758
759 /*
760 * Convenience routine to check to see if a given string is within a memory
761 * region in which a load may be issued given the user's privilege level;
762 * this exists so that we don't need to issue unnecessary dtrace_strlen()
763 * calls in the event that the user has all privileges.
764 */
765 static int
766 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
767 dtrace_vstate_t *vstate)
768 {
769 size_t strsz;
770
771 /*
772 * If we hold the privilege to read from kernel memory, then
773 * everything is readable.
1112
1113 return (0);
1114 }
1115
1116 /*
1117 * This privilege check should be used by actions and subroutines to
1118 * verify that the zone of the process that enabled the invoking ECB
1119 * matches the target credentials
1120 */
1121 static int
1122 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1123 {
1124 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1125
1126 /*
1127 * We should always have a non-NULL state cred here, since if cred
1128 * is null (anonymous tracing), we fast-path bypass this routine.
1129 */
1130 ASSERT(s_cr != NULL);
1131
1132 if ((cr = CRED()) != NULL &&
1133 s_cr->cr_zone == cr->cr_zone)
1134 return (1);
1135
1136 return (0);
1137 }
1138
1139 /*
1140 * This privilege check should be used by actions and subroutines to
1141 * verify that the process has not setuid or changed credentials.
1142 */
1143 static int
1144 dtrace_priv_proc_common_nocd()
1145 {
1146 proc_t *proc;
1147
1148 if ((proc = ttoproc(curthread)) != NULL &&
1149 !(proc->p_flag & SNOCD))
1150 return (1);
1151
1152 return (0);
1153 }
4448 }
4449
4450 start = 0;
4451 end = lastdir;
4452 } else {
4453 ASSERT(subr == DIF_SUBR_BASENAME);
4454 ASSERT(firstbase != -1 && lastbase != -1);
4455 start = firstbase;
4456 end = lastbase;
4457 }
4458
4459 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4460 dest[j] = dtrace_load8(src + i);
4461
4462 dest[j] = '\0';
4463 regs[rd] = (uintptr_t)dest;
4464 mstate->dtms_scratch_ptr += size;
4465 break;
4466 }
4467
4468 case DIF_SUBR_CLEANPATH: {
4469 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4470 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4471 uintptr_t src = tupregs[0].dttk_value;
4472 int i = 0, j = 0;
4473
4474 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4475 regs[rd] = NULL;
4476 break;
4477 }
4478
4479 if (!DTRACE_INSCRATCH(mstate, size)) {
4480 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4481 regs[rd] = NULL;
4482 break;
4483 }
4484
4485 /*
4486 * Move forward, loading each character.
4487 */
4488 do {
4489 c = dtrace_load8(src + i++);
4490 next:
4491 if (j + 5 >= size) /* 5 = strlen("/..c\0") */
4492 break;
4551 dest[j++] = '/';
4552 dest[j++] = '.';
4553 dest[j++] = '.';
4554 dest[j++] = c;
4555 continue;
4556 }
4557
4558 /*
4559 * This is "/../" or "/..\0". We need to back up
4560 * our destination pointer until we find a "/".
4561 */
4562 i--;
4563 while (j != 0 && dest[--j] != '/')
4564 continue;
4565
4566 if (c == '\0')
4567 dest[++j] = '/';
4568 } while (c != '\0');
4569
4570 dest[j] = '\0';
4571 regs[rd] = (uintptr_t)dest;
4572 mstate->dtms_scratch_ptr += size;
4573 break;
4574 }
4575
4576 case DIF_SUBR_INET_NTOA:
4577 case DIF_SUBR_INET_NTOA6:
4578 case DIF_SUBR_INET_NTOP: {
4579 size_t size;
4580 int af, argi, i;
4581 char *base, *end;
4582
4583 if (subr == DIF_SUBR_INET_NTOP) {
4584 af = (int)tupregs[0].dttk_value;
4585 argi = 1;
4586 } else {
4587 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4588 argi = 0;
4589 }
4590
5915 caddr_t tomax;
5916
5917 /*
5918 * A little subtlety with the following (seemingly innocuous)
5919 * declaration of the automatic 'val': by looking at the
5920 * code, you might think that it could be declared in the
5921 * action processing loop, below. (That is, it's only used in
5922 * the action processing loop.) However, it must be declared
5923 * out of that scope because in the case of DIF expression
5924 * arguments to aggregating actions, one iteration of the
5925 * action loop will use the last iteration's value.
5926 */
5927 #ifdef lint
5928 uint64_t val = 0;
5929 #else
5930 uint64_t val;
5931 #endif
5932
5933 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
5934 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
5935 *flags &= ~CPU_DTRACE_ERROR;
5936
5937 if (prov == dtrace_provider) {
5938 /*
5939 * If dtrace itself is the provider of this probe,
5940 * we're only going to continue processing the ECB if
5941 * arg0 (the dtrace_state_t) is equal to the ECB's
5942 * creating state. (This prevents disjoint consumers
5943 * from seeing one another's metaprobes.)
5944 */
5945 if (arg0 != (uint64_t)(uintptr_t)state)
5946 continue;
5947 }
5948
5949 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
5950 /*
5951 * We're not currently active. If our provider isn't
5952 * the dtrace pseudo provider, we're not interested.
5953 */
5954 if (prov != dtrace_provider)
8393 case DIF_OP_STLS:
8394 case DIF_OP_STGAA:
8395 case DIF_OP_STTAA:
8396 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8397 err += efunc(pc, "invalid variable %u\n", v);
8398 if (rs >= nregs)
8399 err += efunc(pc, "invalid register %u\n", rd);
8400 break;
8401 case DIF_OP_CALL:
8402 if (subr > DIF_SUBR_MAX)
8403 err += efunc(pc, "invalid subr %u\n", subr);
8404 if (rd >= nregs)
8405 err += efunc(pc, "invalid register %u\n", rd);
8406 if (rd == 0)
8407 err += efunc(pc, "cannot write to %r0\n");
8408
8409 if (subr == DIF_SUBR_COPYOUT ||
8410 subr == DIF_SUBR_COPYOUTSTR) {
8411 dp->dtdo_destructive = 1;
8412 }
8413 break;
8414 case DIF_OP_PUSHTR:
8415 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8416 err += efunc(pc, "invalid ref type %u\n", type);
8417 if (r2 >= nregs)
8418 err += efunc(pc, "invalid register %u\n", r2);
8419 if (rs >= nregs)
8420 err += efunc(pc, "invalid register %u\n", rs);
8421 break;
8422 case DIF_OP_PUSHTV:
8423 if (type != DIF_TYPE_CTF)
8424 err += efunc(pc, "invalid val type %u\n", type);
8425 if (r2 >= nregs)
8426 err += efunc(pc, "invalid register %u\n", r2);
8427 if (rs >= nregs)
8428 err += efunc(pc, "invalid register %u\n", rs);
8429 break;
8430 default:
8431 err += efunc(pc, "invalid opcode %u\n",
8432 DIF_INSTR_OP(instr));
13067 hdlr.cyh_arg = state;
13068 hdlr.cyh_level = CY_LOW_LEVEL;
13069
13070 when.cyt_when = 0;
13071 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13072
13073 state->dts_cleaner = cyclic_add(&hdlr, &when);
13074
13075 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13076 hdlr.cyh_arg = state;
13077 hdlr.cyh_level = CY_LOW_LEVEL;
13078
13079 when.cyt_when = 0;
13080 when.cyt_interval = dtrace_deadman_interval;
13081
13082 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13083 state->dts_deadman = cyclic_add(&hdlr, &when);
13084
13085 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13086
13087 /*
13088 * Now it's time to actually fire the BEGIN probe. We need to disable
13089 * interrupts here both to record the CPU on which we fired the BEGIN
13090 * probe (the data from this CPU will be processed first at user
13091 * level) and to manually activate the buffer for this CPU.
13092 */
13093 cookie = dtrace_interrupt_disable();
13094 *cpu = CPU->cpu_id;
13095 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13096 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13097
13098 dtrace_probe(dtrace_probeid_begin,
13099 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13100 dtrace_interrupt_enable(cookie);
13101 /*
13102 * We may have had an exit action from a BEGIN probe; only change our
13103 * state to ACTIVE if we're still in WARMUP.
13104 */
13105 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13106 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13107
13183 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13184
13185 /*
13186 * Finally, we can release the reserve and call the END probe. We
13187 * disable interrupts across calling the END probe to allow us to
13188 * return the CPU on which we actually called the END probe. This
13189 * allows user-land to be sure that this CPU's principal buffer is
13190 * processed last.
13191 */
13192 state->dts_reserve = 0;
13193
13194 cookie = dtrace_interrupt_disable();
13195 *cpu = CPU->cpu_id;
13196 dtrace_probe(dtrace_probeid_end,
13197 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13198 dtrace_interrupt_enable(cookie);
13199
13200 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13201 dtrace_sync();
13202
13203 return (0);
13204 }
13205
13206 static int
13207 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13208 dtrace_optval_t val)
13209 {
13210 ASSERT(MUTEX_HELD(&dtrace_lock));
13211
13212 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13213 return (EBUSY);
13214
13215 if (option >= DTRACEOPT_MAX)
13216 return (EINVAL);
13217
13218 if (option != DTRACEOPT_CPU && val < 0)
13219 return (EINVAL);
13220
13221 switch (option) {
13222 case DTRACEOPT_DESTRUCTIVE:
14743 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14744 range = kmem_zalloc(nsize, KM_SLEEP);
14745
14746 if (dtrace_toxrange != NULL) {
14747 ASSERT(osize != 0);
14748 bcopy(dtrace_toxrange, range, osize);
14749 kmem_free(dtrace_toxrange, osize);
14750 }
14751
14752 dtrace_toxrange = range;
14753 }
14754
14755 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14756 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14757
14758 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14759 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14760 dtrace_toxranges++;
14761 }
14762
14763 /*
14764 * DTrace Driver Cookbook Functions
14765 */
14766 /*ARGSUSED*/
14767 static int
14768 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14769 {
14770 dtrace_provider_id_t id;
14771 dtrace_state_t *state = NULL;
14772 dtrace_enabling_t *enab;
14773
14774 mutex_enter(&cpu_lock);
14775 mutex_enter(&dtrace_provider_lock);
14776 mutex_enter(&dtrace_lock);
14777
14778 if (ddi_soft_state_init(&dtrace_softstate,
14779 sizeof (dtrace_state_t), 0) != 0) {
14780 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14781 mutex_exit(&cpu_lock);
14782 mutex_exit(&dtrace_provider_lock);
15897
15898 /*
15899 * If we're being detached with anonymous state, we need to
15900 * indicate to the kernel debugger that DTrace is now inactive.
15901 */
15902 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15903 }
15904
15905 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
15906 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
15907 dtrace_cpu_init = NULL;
15908 dtrace_helpers_cleanup = NULL;
15909 dtrace_helpers_fork = NULL;
15910 dtrace_cpustart_init = NULL;
15911 dtrace_cpustart_fini = NULL;
15912 dtrace_debugger_init = NULL;
15913 dtrace_debugger_fini = NULL;
15914 dtrace_modload = NULL;
15915 dtrace_modunload = NULL;
15916
15917 mutex_exit(&cpu_lock);
15918
15919 if (dtrace_helptrace_enabled) {
15920 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
15921 dtrace_helptrace_buffer = NULL;
15922 }
15923
15924 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
15925 dtrace_probes = NULL;
15926 dtrace_nprobes = 0;
15927
15928 dtrace_hash_destroy(dtrace_bymod);
15929 dtrace_hash_destroy(dtrace_byfunc);
15930 dtrace_hash_destroy(dtrace_byname);
15931 dtrace_bymod = NULL;
15932 dtrace_byfunc = NULL;
15933 dtrace_byname = NULL;
15934
15935 kmem_cache_destroy(dtrace_state_cache);
15936 vmem_destroy(dtrace_minor);
|
153 * available to DTrace consumers via the backtick (`) syntax. One of these,
154 * dtrace_zero, is made deliberately so: it is provided as a source of
155 * well-known, zero-filled memory. While this variable is not documented,
156 * it is used by some translators as an implementation detail.
157 */
158 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
159
160 /*
161 * DTrace Internal Variables
162 */
163 static dev_info_t *dtrace_devi; /* device info */
164 static vmem_t *dtrace_arena; /* probe ID arena */
165 static vmem_t *dtrace_minor; /* minor number arena */
166 static taskq_t *dtrace_taskq; /* task queue */
167 static dtrace_probe_t **dtrace_probes; /* array of all probes */
168 static int dtrace_nprobes; /* number of probes */
169 static dtrace_provider_t *dtrace_provider; /* provider list */
170 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
171 static int dtrace_opens; /* number of opens */
172 static int dtrace_helpers; /* number of helpers */
173 static int dtrace_getf; /* number of unpriv getf()s */
174 static void *dtrace_softstate; /* softstate pointer */
175 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
176 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
177 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
178 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
179 static int dtrace_toxranges; /* number of toxic ranges */
180 static int dtrace_toxranges_max; /* size of toxic range array */
181 static dtrace_anon_t dtrace_anon; /* anonymous enabling */
182 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
183 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
184 static kthread_t *dtrace_panicked; /* panicking thread */
185 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
186 static dtrace_genid_t dtrace_probegen; /* current probe generation */
187 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
188 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
189 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
190 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
191 static int dtrace_dynvar_failclean; /* dynvars failed to clean */
192
193 /*
459
460 static size_t dtrace_strlen(const char *, size_t);
461 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
462 static void dtrace_enabling_provide(dtrace_provider_t *);
463 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
464 static void dtrace_enabling_matchall(void);
465 static void dtrace_enabling_reap(void);
466 static dtrace_state_t *dtrace_anon_grab(void);
467 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
468 dtrace_state_t *, uint64_t, uint64_t);
469 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
470 static void dtrace_buffer_drop(dtrace_buffer_t *);
471 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
472 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
473 dtrace_state_t *, dtrace_mstate_t *);
474 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
475 dtrace_optval_t);
476 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
477 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
478 static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
479 static void dtrace_getf_barrier(void);
480
481 /*
482 * DTrace Probe Context Functions
483 *
484 * These functions are called from probe context. Because probe context is
485 * any context in which C may be called, arbitrarily locks may be held,
486 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
487 * As a result, functions called from probe context may only call other DTrace
488 * support functions -- they may not interact at all with the system at large.
489 * (Note that the ASSERT macro is made probe-context safe by redefining it in
490 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
491 * loads are to be performed from probe context, they _must_ be in terms of
492 * the safe dtrace_load*() variants.
493 *
494 * Some functions in this block are not actually called from probe context;
495 * for these functions, there will be a comment above the function reading
496 * "Note: not called from probe context."
497 */
498 void
499 dtrace_panic(const char *format, ...)
671 return (1);
672
673 return (0);
674 }
675
676
677 /*
678 * Convenience routine to check to see if the address is within a memory
679 * region in which a load may be issued given the user's privilege level;
680 * if not, it sets the appropriate error flags and loads 'addr' into the
681 * illegal value slot.
682 *
683 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
684 * appropriate memory access protection.
685 */
686 static int
687 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
688 dtrace_vstate_t *vstate)
689 {
690 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
691 file_t *fp;
692
693 /*
694 * If we hold the privilege to read from kernel memory, then
695 * everything is readable.
696 */
697 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
698 return (1);
699
700 /*
701 * You can obviously read that which you can store.
702 */
703 if (dtrace_canstore(addr, sz, mstate, vstate))
704 return (1);
705
706 /*
707 * We're allowed to read from our own string table.
708 */
709 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
710 mstate->dtms_difo->dtdo_strlen))
711 return (1);
737 sz, curthread->t_procp, sizeof (proc_t))) {
738 return (1);
739 }
740
741 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
742 curthread->t_cred, sizeof (cred_t))) {
743 return (1);
744 }
745
746 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
747 &(p->p_pidp->pid_id), sizeof (pid_t))) {
748 return (1);
749 }
750
751 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
752 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
753 return (1);
754 }
755 }
756
757 if ((fp = mstate->dtms_getf) != NULL) {
758 uintptr_t psz = sizeof (void *);
759 vnode_t *vp;
760 vnodeops_t *op;
761
762 /*
763 * When getf() returns a file_t, the enabling is implicitly
764 * granted the (transient) right to read the returned file_t
765 * as well as the v_path and v_op->vnop_name of the underlying
766 * vnode. These accesses are allowed after a successful
767 * getf() because the members that they refer to cannot change
768 * once set -- and the barrier logic in the kernel's closef()
769 * path assures that the file_t and its referenced vode_t
770 * cannot themselves be stale (that is, it impossible for
771 * either dtms_getf itself or its f_vnode member to reference
772 * freed memory).
773 */
774 if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
775 return (1);
776
777 if ((vp = fp->f_vnode) != NULL) {
778 if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
779 return (1);
780
781 if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
782 vp->v_path, strlen(vp->v_path) + 1)) {
783 return (1);
784 }
785
786 if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
787 return (1);
788
789 if ((op = vp->v_op) != NULL &&
790 DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
791 return (1);
792 }
793
794 if (op != NULL && op->vnop_name != NULL &&
795 DTRACE_INRANGE(addr, sz, op->vnop_name,
796 strlen(op->vnop_name) + 1)) {
797 return (1);
798 }
799 }
800 }
801
802 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
803 *illval = addr;
804 return (0);
805 }
806
807 /*
808 * Convenience routine to check to see if a given string is within a memory
809 * region in which a load may be issued given the user's privilege level;
810 * this exists so that we don't need to issue unnecessary dtrace_strlen()
811 * calls in the event that the user has all privileges.
812 */
813 static int
814 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
815 dtrace_vstate_t *vstate)
816 {
817 size_t strsz;
818
819 /*
820 * If we hold the privilege to read from kernel memory, then
821 * everything is readable.
1160
1161 return (0);
1162 }
1163
1164 /*
1165 * This privilege check should be used by actions and subroutines to
1166 * verify that the zone of the process that enabled the invoking ECB
1167 * matches the target credentials
1168 */
1169 static int
1170 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1171 {
1172 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1173
1174 /*
1175 * We should always have a non-NULL state cred here, since if cred
1176 * is null (anonymous tracing), we fast-path bypass this routine.
1177 */
1178 ASSERT(s_cr != NULL);
1179
1180 if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
1181 return (1);
1182
1183 return (0);
1184 }
1185
1186 /*
1187 * This privilege check should be used by actions and subroutines to
1188 * verify that the process has not setuid or changed credentials.
1189 */
1190 static int
1191 dtrace_priv_proc_common_nocd()
1192 {
1193 proc_t *proc;
1194
1195 if ((proc = ttoproc(curthread)) != NULL &&
1196 !(proc->p_flag & SNOCD))
1197 return (1);
1198
1199 return (0);
1200 }
4495 }
4496
4497 start = 0;
4498 end = lastdir;
4499 } else {
4500 ASSERT(subr == DIF_SUBR_BASENAME);
4501 ASSERT(firstbase != -1 && lastbase != -1);
4502 start = firstbase;
4503 end = lastbase;
4504 }
4505
4506 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4507 dest[j] = dtrace_load8(src + i);
4508
4509 dest[j] = '\0';
4510 regs[rd] = (uintptr_t)dest;
4511 mstate->dtms_scratch_ptr += size;
4512 break;
4513 }
4514
4515 case DIF_SUBR_GETF: {
4516 uintptr_t fd = tupregs[0].dttk_value;
4517 uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
4518 file_t *fp;
4519
4520 if (!dtrace_priv_proc(state, mstate)) {
4521 regs[rd] = NULL;
4522 break;
4523 }
4524
4525 /*
4526 * This is safe because fi_nfiles only increases, and the
4527 * fi_list array is not freed when the array size doubles.
4528 * (See the comment in flist_grow() for details on the
4529 * management of the u_finfo structure.)
4530 */
4531 fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
4532
4533 mstate->dtms_getf = fp;
4534 regs[rd] = (uintptr_t)fp;
4535 break;
4536 }
4537
4538 case DIF_SUBR_CLEANPATH: {
4539 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4540 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4541 uintptr_t src = tupregs[0].dttk_value;
4542 int i = 0, j = 0;
4543 zone_t *z;
4544
4545 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4546 regs[rd] = NULL;
4547 break;
4548 }
4549
4550 if (!DTRACE_INSCRATCH(mstate, size)) {
4551 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4552 regs[rd] = NULL;
4553 break;
4554 }
4555
4556 /*
4557 * Move forward, loading each character.
4558 */
4559 do {
4560 c = dtrace_load8(src + i++);
4561 next:
4562 if (j + 5 >= size) /* 5 = strlen("/..c\0") */
4563 break;
4622 dest[j++] = '/';
4623 dest[j++] = '.';
4624 dest[j++] = '.';
4625 dest[j++] = c;
4626 continue;
4627 }
4628
4629 /*
4630 * This is "/../" or "/..\0". We need to back up
4631 * our destination pointer until we find a "/".
4632 */
4633 i--;
4634 while (j != 0 && dest[--j] != '/')
4635 continue;
4636
4637 if (c == '\0')
4638 dest[++j] = '/';
4639 } while (c != '\0');
4640
4641 dest[j] = '\0';
4642
4643 if (mstate->dtms_getf != NULL &&
4644 !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
4645 (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
4646 /*
4647 * If we've done a getf() as a part of this ECB and we
4648 * don't have kernel access (and we're not in the global
4649 * zone), check if the path we cleaned up begins with
4650 * the zone's root path, and trim it off if so. Note
4651 * that this is an output cleanliness issue, not a
4652 * security issue: knowing one's zone root path does
4653 * not enable privilege escalation.
4654 */
4655 if (strstr(dest, z->zone_rootpath) == dest)
4656 dest += strlen(z->zone_rootpath) - 1;
4657 }
4658
4659 regs[rd] = (uintptr_t)dest;
4660 mstate->dtms_scratch_ptr += size;
4661 break;
4662 }
4663
4664 case DIF_SUBR_INET_NTOA:
4665 case DIF_SUBR_INET_NTOA6:
4666 case DIF_SUBR_INET_NTOP: {
4667 size_t size;
4668 int af, argi, i;
4669 char *base, *end;
4670
4671 if (subr == DIF_SUBR_INET_NTOP) {
4672 af = (int)tupregs[0].dttk_value;
4673 argi = 1;
4674 } else {
4675 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4676 argi = 0;
4677 }
4678
6003 caddr_t tomax;
6004
6005 /*
6006 * A little subtlety with the following (seemingly innocuous)
6007 * declaration of the automatic 'val': by looking at the
6008 * code, you might think that it could be declared in the
6009 * action processing loop, below. (That is, it's only used in
6010 * the action processing loop.) However, it must be declared
6011 * out of that scope because in the case of DIF expression
6012 * arguments to aggregating actions, one iteration of the
6013 * action loop will use the last iteration's value.
6014 */
6015 #ifdef lint
6016 uint64_t val = 0;
6017 #else
6018 uint64_t val;
6019 #endif
6020
6021 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
6022 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
6023 mstate.dtms_getf = NULL;
6024
6025 *flags &= ~CPU_DTRACE_ERROR;
6026
6027 if (prov == dtrace_provider) {
6028 /*
6029 * If dtrace itself is the provider of this probe,
6030 * we're only going to continue processing the ECB if
6031 * arg0 (the dtrace_state_t) is equal to the ECB's
6032 * creating state. (This prevents disjoint consumers
6033 * from seeing one another's metaprobes.)
6034 */
6035 if (arg0 != (uint64_t)(uintptr_t)state)
6036 continue;
6037 }
6038
6039 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
6040 /*
6041 * We're not currently active. If our provider isn't
6042 * the dtrace pseudo provider, we're not interested.
6043 */
6044 if (prov != dtrace_provider)
8483 case DIF_OP_STLS:
8484 case DIF_OP_STGAA:
8485 case DIF_OP_STTAA:
8486 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8487 err += efunc(pc, "invalid variable %u\n", v);
8488 if (rs >= nregs)
8489 err += efunc(pc, "invalid register %u\n", rd);
8490 break;
8491 case DIF_OP_CALL:
8492 if (subr > DIF_SUBR_MAX)
8493 err += efunc(pc, "invalid subr %u\n", subr);
8494 if (rd >= nregs)
8495 err += efunc(pc, "invalid register %u\n", rd);
8496 if (rd == 0)
8497 err += efunc(pc, "cannot write to %r0\n");
8498
8499 if (subr == DIF_SUBR_COPYOUT ||
8500 subr == DIF_SUBR_COPYOUTSTR) {
8501 dp->dtdo_destructive = 1;
8502 }
8503
8504 if (subr == DIF_SUBR_GETF) {
8505 /*
8506 * If we have a getf() we need to record that
8507 * in our state. Note that our state can be
8508 * NULL if this is a helper -- but in that
8509 * case, the call to getf() is itself illegal,
8510 * and will be caught (slightly later) when
8511 * the helper is validated.
8512 */
8513 if (vstate->dtvs_state != NULL)
8514 vstate->dtvs_state->dts_getf++;
8515 }
8516
8517 break;
8518 case DIF_OP_PUSHTR:
8519 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8520 err += efunc(pc, "invalid ref type %u\n", type);
8521 if (r2 >= nregs)
8522 err += efunc(pc, "invalid register %u\n", r2);
8523 if (rs >= nregs)
8524 err += efunc(pc, "invalid register %u\n", rs);
8525 break;
8526 case DIF_OP_PUSHTV:
8527 if (type != DIF_TYPE_CTF)
8528 err += efunc(pc, "invalid val type %u\n", type);
8529 if (r2 >= nregs)
8530 err += efunc(pc, "invalid register %u\n", r2);
8531 if (rs >= nregs)
8532 err += efunc(pc, "invalid register %u\n", rs);
8533 break;
8534 default:
8535 err += efunc(pc, "invalid opcode %u\n",
8536 DIF_INSTR_OP(instr));
13171 hdlr.cyh_arg = state;
13172 hdlr.cyh_level = CY_LOW_LEVEL;
13173
13174 when.cyt_when = 0;
13175 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13176
13177 state->dts_cleaner = cyclic_add(&hdlr, &when);
13178
13179 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13180 hdlr.cyh_arg = state;
13181 hdlr.cyh_level = CY_LOW_LEVEL;
13182
13183 when.cyt_when = 0;
13184 when.cyt_interval = dtrace_deadman_interval;
13185
13186 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13187 state->dts_deadman = cyclic_add(&hdlr, &when);
13188
13189 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13190
13191 if (state->dts_getf != 0 &&
13192 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13193 /*
13194 * We don't have kernel privs but we have at least one call
13195 * to getf(); we need to bump our zone's count, and (if
13196 * this is the first enabling to have an unprivileged call
13197 * to getf()) we need to hook into closef().
13198 */
13199 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
13200
13201 if (dtrace_getf++ == 0) {
13202 ASSERT(dtrace_closef == NULL);
13203 dtrace_closef = dtrace_getf_barrier;
13204 }
13205 }
13206
13207 /*
13208 * Now it's time to actually fire the BEGIN probe. We need to disable
13209 * interrupts here both to record the CPU on which we fired the BEGIN
13210 * probe (the data from this CPU will be processed first at user
13211 * level) and to manually activate the buffer for this CPU.
13212 */
13213 cookie = dtrace_interrupt_disable();
13214 *cpu = CPU->cpu_id;
13215 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13216 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13217
13218 dtrace_probe(dtrace_probeid_begin,
13219 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13220 dtrace_interrupt_enable(cookie);
13221 /*
13222 * We may have had an exit action from a BEGIN probe; only change our
13223 * state to ACTIVE if we're still in WARMUP.
13224 */
13225 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13226 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13227
13303 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13304
13305 /*
13306 * Finally, we can release the reserve and call the END probe. We
13307 * disable interrupts across calling the END probe to allow us to
13308 * return the CPU on which we actually called the END probe. This
13309 * allows user-land to be sure that this CPU's principal buffer is
13310 * processed last.
13311 */
13312 state->dts_reserve = 0;
13313
13314 cookie = dtrace_interrupt_disable();
13315 *cpu = CPU->cpu_id;
13316 dtrace_probe(dtrace_probeid_end,
13317 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13318 dtrace_interrupt_enable(cookie);
13319
13320 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13321 dtrace_sync();
13322
13323 if (state->dts_getf != 0 &&
13324 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13325 /*
13326 * We don't have kernel privs but we have at least one call
13327 * to getf(); we need to lower our zone's count, and (if
13328 * this is the last enabling to have an unprivileged call
13329 * to getf()) we need to clear the closef() hook.
13330 */
13331 ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
13332 ASSERT(dtrace_closef == dtrace_getf_barrier);
13333 ASSERT(dtrace_getf > 0);
13334
13335 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
13336
13337 if (--dtrace_getf == 0)
13338 dtrace_closef = NULL;
13339 }
13340
13341 return (0);
13342 }
13343
13344 static int
13345 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13346 dtrace_optval_t val)
13347 {
13348 ASSERT(MUTEX_HELD(&dtrace_lock));
13349
13350 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13351 return (EBUSY);
13352
13353 if (option >= DTRACEOPT_MAX)
13354 return (EINVAL);
13355
13356 if (option != DTRACEOPT_CPU && val < 0)
13357 return (EINVAL);
13358
13359 switch (option) {
13360 case DTRACEOPT_DESTRUCTIVE:
14881 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14882 range = kmem_zalloc(nsize, KM_SLEEP);
14883
14884 if (dtrace_toxrange != NULL) {
14885 ASSERT(osize != 0);
14886 bcopy(dtrace_toxrange, range, osize);
14887 kmem_free(dtrace_toxrange, osize);
14888 }
14889
14890 dtrace_toxrange = range;
14891 }
14892
14893 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14894 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14895
14896 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14897 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14898 dtrace_toxranges++;
14899 }
14900
14901 static void
14902 dtrace_getf_barrier()
14903 {
14904 /*
14905 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
14906 * that contain calls to getf(), this routine will be called on every
14907 * closef() before either the underlying vnode is released or the
14908 * file_t itself is freed. By the time we are here, it is essential
14909 * that the file_t can no longer be accessed from a call to getf()
14910 * in probe context -- that assures that a dtrace_sync() can be used
14911 * to clear out any enablings referring to the old structures.
14912 */
14913 if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
14914 kcred->cr_zone->zone_dtrace_getf != 0)
14915 dtrace_sync();
14916 }
14917
14918 /*
14919 * DTrace Driver Cookbook Functions
14920 */
14921 /*ARGSUSED*/
14922 static int
14923 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14924 {
14925 dtrace_provider_id_t id;
14926 dtrace_state_t *state = NULL;
14927 dtrace_enabling_t *enab;
14928
14929 mutex_enter(&cpu_lock);
14930 mutex_enter(&dtrace_provider_lock);
14931 mutex_enter(&dtrace_lock);
14932
14933 if (ddi_soft_state_init(&dtrace_softstate,
14934 sizeof (dtrace_state_t), 0) != 0) {
14935 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14936 mutex_exit(&cpu_lock);
14937 mutex_exit(&dtrace_provider_lock);
16052
16053 /*
16054 * If we're being detached with anonymous state, we need to
16055 * indicate to the kernel debugger that DTrace is now inactive.
16056 */
16057 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16058 }
16059
16060 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
16061 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
16062 dtrace_cpu_init = NULL;
16063 dtrace_helpers_cleanup = NULL;
16064 dtrace_helpers_fork = NULL;
16065 dtrace_cpustart_init = NULL;
16066 dtrace_cpustart_fini = NULL;
16067 dtrace_debugger_init = NULL;
16068 dtrace_debugger_fini = NULL;
16069 dtrace_modload = NULL;
16070 dtrace_modunload = NULL;
16071
16072 ASSERT(dtrace_getf == 0);
16073 ASSERT(dtrace_closef == NULL);
16074
16075 mutex_exit(&cpu_lock);
16076
16077 if (dtrace_helptrace_enabled) {
16078 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
16079 dtrace_helptrace_buffer = NULL;
16080 }
16081
16082 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
16083 dtrace_probes = NULL;
16084 dtrace_nprobes = 0;
16085
16086 dtrace_hash_destroy(dtrace_bymod);
16087 dtrace_hash_destroy(dtrace_byfunc);
16088 dtrace_hash_destroy(dtrace_byname);
16089 dtrace_bymod = NULL;
16090 dtrace_byfunc = NULL;
16091 dtrace_byname = NULL;
16092
16093 kmem_cache_destroy(dtrace_state_cache);
16094 vmem_destroy(dtrace_minor);
|