Print this page
2916 DTrace in a zone should be able to access fds[]
@@ -168,10 +168,11 @@
static int dtrace_nprobes; /* number of probes */
static dtrace_provider_t *dtrace_provider; /* provider list */
static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
static int dtrace_opens; /* number of opens */
static int dtrace_helpers; /* number of helpers */
+static int dtrace_getf; /* number of unpriv getf()s */
static void *dtrace_softstate; /* softstate pointer */
static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
@@ -473,10 +474,11 @@
static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
dtrace_optval_t);
static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
+static void dtrace_getf_barrier(void);
/*
* DTrace Probe Context Functions
*
* These functions are called from probe context. Because probe context is
@@ -684,10 +686,11 @@
static int
dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
dtrace_vstate_t *vstate)
{
volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
+ file_t *fp;
/*
* If we hold the privilege to read from kernel memory, then
* everything is readable.
*/
@@ -749,10 +752,55 @@
curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
return (1);
}
}
+ if ((fp = mstate->dtms_getf) != NULL) {
+ uintptr_t psz = sizeof (void *);
+ vnode_t *vp;
+ vnodeops_t *op;
+
+ /*
+ * When getf() returns a file_t, the enabling is implicitly
+ * granted the (transient) right to read the returned file_t
+ * as well as the v_path and v_op->vnop_name of the underlying
+ * vnode. These accesses are allowed after a successful
+ * getf() because the members that they refer to cannot change
+ * once set -- and the barrier logic in the kernel's closef()
+ * path assures that the file_t and its referenced vode_t
+ * cannot themselves be stale (that is, it impossible for
+ * either dtms_getf itself or its f_vnode member to reference
+ * freed memory).
+ */
+ if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
+ return (1);
+
+ if ((vp = fp->f_vnode) != NULL) {
+ if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
+ return (1);
+
+ if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
+ vp->v_path, strlen(vp->v_path) + 1)) {
+ return (1);
+ }
+
+ if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
+ return (1);
+
+ if ((op = vp->v_op) != NULL &&
+ DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
+ return (1);
+ }
+
+ if (op != NULL && op->vnop_name != NULL &&
+ DTRACE_INRANGE(addr, sz, op->vnop_name,
+ strlen(op->vnop_name) + 1)) {
+ return (1);
+ }
+ }
+ }
+
DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
*illval = addr;
return (0);
}
@@ -1127,12 +1175,11 @@
* We should always have a non-NULL state cred here, since if cred
* is null (anonymous tracing), we fast-path bypass this routine.
*/
ASSERT(s_cr != NULL);
- if ((cr = CRED()) != NULL &&
- s_cr->cr_zone == cr->cr_zone)
+ if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
return (1);
return (0);
}
@@ -4463,15 +4510,39 @@
regs[rd] = (uintptr_t)dest;
mstate->dtms_scratch_ptr += size;
break;
}
+ case DIF_SUBR_GETF: {
+ uintptr_t fd = tupregs[0].dttk_value;
+ uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
+ file_t *fp;
+
+ if (!dtrace_priv_proc(state, mstate)) {
+ regs[rd] = NULL;
+ break;
+ }
+
+ /*
+ * This is safe because fi_nfiles only increases, and the
+ * fi_list array is not freed when the array size doubles.
+ * (See the comment in flist_grow() for details on the
+ * management of the u_finfo structure.)
+ */
+ fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
+
+ mstate->dtms_getf = fp;
+ regs[rd] = (uintptr_t)fp;
+ break;
+ }
+
case DIF_SUBR_CLEANPATH: {
char *dest = (char *)mstate->dtms_scratch_ptr, c;
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
uintptr_t src = tupregs[0].dttk_value;
int i = 0, j = 0;
+ zone_t *z;
if (!dtrace_strcanload(src, size, mstate, vstate)) {
regs[rd] = NULL;
break;
}
@@ -4566,10 +4637,27 @@
if (c == '\0')
dest[++j] = '/';
} while (c != '\0');
dest[j] = '\0';
+
+ if (mstate->dtms_getf != NULL &&
+ !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
+ (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
+ /*
+ * If we've done a getf() as a part of this ECB and we
+ * don't have kernel access (and we're not in the global
+ * zone), check if the path we cleaned up begins with
+ * the zone's root path, and trim it off if so. Note
+ * that this is an output cleanliness issue, not a
+ * security issue: knowing one's zone root path does
+ * not enable privilege escalation.
+ */
+ if (strstr(dest, z->zone_rootpath) == dest)
+ dest += strlen(z->zone_rootpath) - 1;
+ }
+
regs[rd] = (uintptr_t)dest;
mstate->dtms_scratch_ptr += size;
break;
}
@@ -5930,10 +6018,12 @@
uint64_t val;
#endif
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
+ mstate.dtms_getf = NULL;
+
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
/*
* If dtrace itself is the provider of this probe,
@@ -8408,10 +8498,24 @@
if (subr == DIF_SUBR_COPYOUT ||
subr == DIF_SUBR_COPYOUTSTR) {
dp->dtdo_destructive = 1;
}
+
+ if (subr == DIF_SUBR_GETF) {
+ /*
+ * If we have a getf() we need to record that
+ * in our state. Note that our state can be
+ * NULL if this is a helper -- but in that
+ * case, the call to getf() is itself illegal,
+ * and will be caught (slightly later) when
+ * the helper is validated.
+ */
+ if (vstate->dtvs_state != NULL)
+ vstate->dtvs_state->dts_getf++;
+ }
+
break;
case DIF_OP_PUSHTR:
if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
err += efunc(pc, "invalid ref type %u\n", type);
if (r2 >= nregs)
@@ -13082,11 +13186,27 @@
state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
state->dts_deadman = cyclic_add(&hdlr, &when);
state->dts_activity = DTRACE_ACTIVITY_WARMUP;
+ if (state->dts_getf != 0 &&
+ !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
/*
+ * We don't have kernel privs but we have at least one call
+ * to getf(); we need to bump our zone's count, and (if
+ * this is the first enabling to have an unprivileged call
+ * to getf()) we need to hook into closef().
+ */
+ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
+
+ if (dtrace_getf++ == 0) {
+ ASSERT(dtrace_closef == NULL);
+ dtrace_closef = dtrace_getf_barrier;
+ }
+ }
+
+ /*
* Now it's time to actually fire the BEGIN probe. We need to disable
* interrupts here both to record the CPU on which we fired the BEGIN
* probe (the data from this CPU will be processed first at user
* level) and to manually activate the buffer for this CPU.
*/
@@ -13198,10 +13318,28 @@
dtrace_interrupt_enable(cookie);
state->dts_activity = DTRACE_ACTIVITY_STOPPED;
dtrace_sync();
+ if (state->dts_getf != 0 &&
+ !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
+ /*
+ * We don't have kernel privs but we have at least one call
+ * to getf(); we need to lower our zone's count, and (if
+ * this is the last enabling to have an unprivileged call
+ * to getf()) we need to clear the closef() hook.
+ */
+ ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
+ ASSERT(dtrace_closef == dtrace_getf_barrier);
+ ASSERT(dtrace_getf > 0);
+
+ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
+
+ if (--dtrace_getf == 0)
+ dtrace_closef = NULL;
+ }
+
return (0);
}
static int
dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
@@ -14758,10 +14896,27 @@
dtrace_toxrange[dtrace_toxranges].dtt_base = base;
dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
dtrace_toxranges++;
}
+static void
+dtrace_getf_barrier()
+{
+ /*
+ * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
+ * that contain calls to getf(), this routine will be called on every
+ * closef() before either the underlying vnode is released or the
+ * file_t itself is freed. By the time we are here, it is essential
+ * that the file_t can no longer be accessed from a call to getf()
+ * in probe context -- that assures that a dtrace_sync() can be used
+ * to clear out any enablings referring to the old structures.
+ */
+ if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
+ kcred->cr_zone->zone_dtrace_getf != 0)
+ dtrace_sync();
+}
+
/*
* DTrace Driver Cookbook Functions
*/
/*ARGSUSED*/
static int
@@ -15912,10 +16067,13 @@
dtrace_debugger_init = NULL;
dtrace_debugger_fini = NULL;
dtrace_modload = NULL;
dtrace_modunload = NULL;
+ ASSERT(dtrace_getf == 0);
+ ASSERT(dtrace_closef == NULL);
+
mutex_exit(&cpu_lock);
if (dtrace_helptrace_enabled) {
kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
dtrace_helptrace_buffer = NULL;