Print this page
2915 DTrace in a zone should see "cpu", "curpsinfo", et al
2916 DTrace in a zone should be able to access fds[]
2917 DTrace in a zone should have limited provider access
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
Reviewed by: Adam Leventhal <ahl@delphix.com>


 154  * available to DTrace consumers via the backtick (`) syntax.  One of these,
 155  * dtrace_zero, is made deliberately so:  it is provided as a source of
 156  * well-known, zero-filled memory.  While this variable is not documented,
 157  * it is used by some translators as an implementation detail.
 158  */
 159 const char      dtrace_zero[256] = { 0 };       /* zero-filled memory */
 160 
 161 /*
 162  * DTrace Internal Variables
 163  */
 164 static dev_info_t       *dtrace_devi;           /* device info */
 165 static vmem_t           *dtrace_arena;          /* probe ID arena */
 166 static vmem_t           *dtrace_minor;          /* minor number arena */
 167 static taskq_t          *dtrace_taskq;          /* task queue */
 168 static dtrace_probe_t   **dtrace_probes;        /* array of all probes */
 169 static int              dtrace_nprobes;         /* number of probes */
 170 static dtrace_provider_t *dtrace_provider;      /* provider list */
 171 static dtrace_meta_t    *dtrace_meta_pid;       /* user-land meta provider */
 172 static int              dtrace_opens;           /* number of opens */
 173 static int              dtrace_helpers;         /* number of helpers */

 174 static void             *dtrace_softstate;      /* softstate pointer */
 175 static dtrace_hash_t    *dtrace_bymod;          /* probes hashed by module */
 176 static dtrace_hash_t    *dtrace_byfunc;         /* probes hashed by function */
 177 static dtrace_hash_t    *dtrace_byname;         /* probes hashed by name */
 178 static dtrace_toxrange_t *dtrace_toxrange;      /* toxic range array */
 179 static int              dtrace_toxranges;       /* number of toxic ranges */
 180 static int              dtrace_toxranges_max;   /* size of toxic range array */
 181 static dtrace_anon_t    dtrace_anon;            /* anonymous enabling */
 182 static kmem_cache_t     *dtrace_state_cache;    /* cache for dynamic state */
 183 static uint64_t         dtrace_vtime_references; /* number of vtimestamp refs */
 184 static kthread_t        *dtrace_panicked;       /* panicking thread */
 185 static dtrace_ecb_t     *dtrace_ecb_create_cache; /* cached created ECB */
 186 static dtrace_genid_t   dtrace_probegen;        /* current probe generation */
 187 static dtrace_helpers_t *dtrace_deferred_pid;   /* deferred helper list */
 188 static dtrace_enabling_t *dtrace_retained;      /* list of retained enablings */
 189 static dtrace_genid_t   dtrace_retained_gen;    /* current retained enab gen */
 190 static dtrace_dynvar_t  dtrace_dynhash_sink;    /* end of dynamic hash chains */
 191 static int              dtrace_dynvar_failclean; /* dynvars failed to clean */
 192 
 193 /*


 356         *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
 357 
 358 #ifndef __x86
 359 #define DTRACE_ALIGNCHECK(addr, size, flags)                            \
 360         if (addr & (size - 1)) {                                    \
 361                 *flags |= CPU_DTRACE_BADALIGN;                          \
 362                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;     \
 363                 return (0);                                             \
 364         }
 365 #else
 366 #define DTRACE_ALIGNCHECK(addr, size, flags)
 367 #endif
 368 
 369 /*
 370  * Test whether a range of memory starting at testaddr of size testsz falls
 371  * within the range of memory described by addr, sz.  We take care to avoid
 372  * problems with overflow and underflow of the unsigned quantities, and
 373  * disallow all negative sizes.  Ranges of size 0 are allowed.
 374  */
 375 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
 376         ((testaddr) - (baseaddr) < (basesz) && \
 377         (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
 378         (testaddr) + (testsz) >= (testaddr))
 379 
 380 /*
 381  * Test whether alloc_sz bytes will fit in the scratch region.  We isolate
 382  * alloc_sz on the righthand side of the comparison in order to avoid overflow
 383  * or underflow in the comparison with it.  This is simpler than the INRANGE
 384  * check above, because we know that the dtms_scratch_ptr is valid in the
 385  * range.  Allocations of size zero are allowed.
 386  */
 387 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
 388         ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
 389         (mstate)->dtms_scratch_ptr >= (alloc_sz))
 390 
 391 #define DTRACE_LOADFUNC(bits)                                           \
 392 /*CSTYLED*/                                                             \
 393 uint##bits##_t                                                          \
 394 dtrace_load##bits(uintptr_t addr)                                       \
 395 {                                                                       \
 396         size_t size = bits / NBBY;                                      \
 397         /*CSTYLED*/                                                     \


 458         (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
 459 
 460 static size_t dtrace_strlen(const char *, size_t);
 461 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
 462 static void dtrace_enabling_provide(dtrace_provider_t *);
 463 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
 464 static void dtrace_enabling_matchall(void);
 465 static void dtrace_enabling_reap(void);
 466 static dtrace_state_t *dtrace_anon_grab(void);
 467 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
 468     dtrace_state_t *, uint64_t, uint64_t);
 469 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
 470 static void dtrace_buffer_drop(dtrace_buffer_t *);
 471 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
 472 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
 473     dtrace_state_t *, dtrace_mstate_t *);
 474 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
 475     dtrace_optval_t);
 476 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
 477 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);


 478 
 479 /*
 480  * DTrace Probe Context Functions
 481  *
 482  * These functions are called from probe context.  Because probe context is
 483  * any context in which C may be called, arbitrarily locks may be held,
 484  * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
 485  * As a result, functions called from probe context may only call other DTrace
 486  * support functions -- they may not interact at all with the system at large.
 487  * (Note that the ASSERT macro is made probe-context safe by redefining it in
 488  * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
 489  * loads are to be performed from probe context, they _must_ be in terms of
 490  * the safe dtrace_load*() variants.
 491  *
 492  * Some functions in this block are not actually called from probe context;
 493  * for these functions, there will be a comment above the function reading
 494  * "Note:  not called from probe context."
 495  */
 496 void
 497 dtrace_panic(const char *format, ...)


 602  * be issued.  This includes the DTrace scratch areas, and any DTrace variable
 603  * region.  The caller of dtrace_canstore() is responsible for performing any
 604  * alignment checks that are needed before stores are actually executed.
 605  */
 606 static int
 607 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 608     dtrace_vstate_t *vstate)
 609 {
 610         /*
 611          * First, check to see if the address is in scratch space...
 612          */
 613         if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
 614             mstate->dtms_scratch_size))
 615                 return (1);
 616 
 617         /*
 618          * Now check to see if it's a dynamic variable.  This check will pick
 619          * up both thread-local variables and any global dynamically-allocated
 620          * variables.
 621          */
 622         if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
 623             vstate->dtvs_dynvars.dtds_size)) {
 624                 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 625                 uintptr_t base = (uintptr_t)dstate->dtds_base +
 626                     (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
 627                 uintptr_t chunkoffs;
 628 
 629                 /*
 630                  * Before we assume that we can store here, we need to make
 631                  * sure that it isn't in our metadata -- storing to our
 632                  * dynamic variable metadata would corrupt our state.  For
 633                  * the range to not include any dynamic variable metadata,
 634                  * it must:
 635                  *
 636                  *      (1) Start above the hash table that is at the base of
 637                  *      the dynamic variable space
 638                  *
 639                  *      (2) Have a starting chunk offset that is beyond the
 640                  *      dtrace_dynvar_t that is at the base of every chunk
 641                  *
 642                  *      (3) Not span a chunk boundary


 669                 return (1);
 670 
 671         return (0);
 672 }
 673 
 674 
 675 /*
 676  * Convenience routine to check to see if the address is within a memory
 677  * region in which a load may be issued given the user's privilege level;
 678  * if not, it sets the appropriate error flags and loads 'addr' into the
 679  * illegal value slot.
 680  *
 681  * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
 682  * appropriate memory access protection.
 683  */
 684 static int
 685 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 686     dtrace_vstate_t *vstate)
 687 {
 688         volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;

 689 
 690         /*
 691          * If we hold the privilege to read from kernel memory, then
 692          * everything is readable.
 693          */
 694         if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
 695                 return (1);
 696 
 697         /*
 698          * You can obviously read that which you can store.
 699          */
 700         if (dtrace_canstore(addr, sz, mstate, vstate))
 701                 return (1);
 702 
 703         /*
 704          * We're allowed to read from our own string table.
 705          */
 706         if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
 707             mstate->dtms_difo->dtdo_strlen))
 708                 return (1);
 709 

























































































 710         DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
 711         *illval = addr;
 712         return (0);
 713 }
 714 
 715 /*
 716  * Convenience routine to check to see if a given string is within a memory
 717  * region in which a load may be issued given the user's privilege level;
 718  * this exists so that we don't need to issue unnecessary dtrace_strlen()
 719  * calls in the event that the user has all privileges.
 720  */
 721 static int
 722 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 723     dtrace_vstate_t *vstate)
 724 {
 725         size_t strsz;
 726 
 727         /*
 728          * If we hold the privilege to read from kernel memory, then
 729          * everything is readable.


1068 
1069         return (0);
1070 }
1071 
1072 /*
1073  * This privilege check should be used by actions and subroutines to
1074  * verify that the zone of the process that enabled the invoking ECB
1075  * matches the target credentials
1076  */
1077 static int
1078 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1079 {
1080         cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1081 
1082         /*
1083          * We should always have a non-NULL state cred here, since if cred
1084          * is null (anonymous tracing), we fast-path bypass this routine.
1085          */
1086         ASSERT(s_cr != NULL);
1087 
1088         if ((cr = CRED()) != NULL &&
1089             s_cr->cr_zone == cr->cr_zone)
1090                 return (1);
1091 
1092         return (0);
1093 }
1094 
1095 /*
1096  * This privilege check should be used by actions and subroutines to
1097  * verify that the process has not setuid or changed credentials.
1098  */
1099 static int
1100 dtrace_priv_proc_common_nocd()
1101 {
1102         proc_t *proc;
1103 
1104         if ((proc = ttoproc(curthread)) != NULL &&
1105             !(proc->p_flag & SNOCD))
1106                 return (1);
1107 
1108         return (0);
1109 }


1192  * Determine if the dte_cond of the specified ECB allows for processing of
1193  * the current probe to continue.  Note that this routine may allow continued
1194  * processing, but with access(es) stripped from the mstate's dtms_access
1195  * field.
1196  */
1197 static int
1198 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1199     dtrace_ecb_t *ecb)
1200 {
1201         dtrace_probe_t *probe = ecb->dte_probe;
1202         dtrace_provider_t *prov = probe->dtpr_provider;
1203         dtrace_pops_t *pops = &prov->dtpv_pops;
1204         int mode = DTRACE_MODE_NOPRIV_DROP;
1205 
1206         ASSERT(ecb->dte_cond);
1207 
1208         if (pops->dtps_mode != NULL) {
1209                 mode = pops->dtps_mode(prov->dtpv_arg,
1210                     probe->dtpr_id, probe->dtpr_arg);
1211 
1212                 ASSERT((mode & DTRACE_MODE_USER) ||
1213                     (mode & DTRACE_MODE_KERNEL));
1214                 ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
1215                     (mode & DTRACE_MODE_NOPRIV_DROP));
1216         }
1217 
1218         /*
1219          * If the dte_cond bits indicate that this consumer is only allowed to
1220          * see user-mode firings of this probe, call the provider's dtps_mode()
1221          * entry point to check that the probe was fired while in a user
1222          * context.  If that's not the case, use the policy specified by the
1223          * provider to determine if we drop the probe or merely restrict
1224          * operation.
1225          */
1226         if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1227                 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1228 
1229                 if (!(mode & DTRACE_MODE_USER)) {
1230                         if (mode & DTRACE_MODE_NOPRIV_DROP)
1231                                 return (0);
1232 
1233                         mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1234                 }
1235         }
1236 
1237         /*
1238          * This is more subtle than it looks. We have to be absolutely certain
1239          * that CRED() isn't going to change out from under us so it's only
1240          * legit to examine that structure if we're in constrained situations.
1241          * Currently, the only times we'll this check is if a non-super-user
1242          * has enabled the profile or syscall providers -- providers that
1243          * allow visibility of all processes. For the profile case, the check
1244          * above will ensure that we're examining a user context.


1271          * in our zone, check to see if our mode policy is to restrict rather
1272          * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1273          * and DTRACE_ACCESS_ARGS
1274          */
1275         if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1276                 cred_t *cr;
1277                 cred_t *s_cr = state->dts_cred.dcr_cred;
1278 
1279                 ASSERT(s_cr != NULL);
1280 
1281                 if ((cr = CRED()) == NULL ||
1282                     s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1283                         if (mode & DTRACE_MODE_NOPRIV_DROP)
1284                                 return (0);
1285 
1286                         mstate->dtms_access &=
1287                             ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1288                 }
1289         }
1290 









1291         return (1);
1292 }
1293 
1294 /*
1295  * Note:  not called from probe context.  This function is called
1296  * asynchronously (and at a regular interval) from outside of probe context to
1297  * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable
1298  * cleaning is explained in detail in <sys/dtrace_impl.h>.
1299  */
1300 void
1301 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1302 {
1303         dtrace_dynvar_t *dirty;
1304         dtrace_dstate_percpu_t *dcpu;
1305         dtrace_dynvar_t **rinsep;
1306         int i, j, work = 0;
1307 
1308         for (i = 0; i < NCPU; i++) {
1309                 dcpu = &dstate->dtds_percpu[i];
1310                 rinsep = &dcpu->dtdsc_rinsing;


2907                 return (dtrace_getreg(lwp->lwp_regs, ndx));
2908         }
2909 
2910         case DIF_VAR_VMREGS: {
2911                 uint64_t rval;
2912 
2913                 if (!dtrace_priv_kernel(state))
2914                         return (0);
2915 
2916                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
2917 
2918                 rval = dtrace_getvmreg(ndx,
2919                     &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
2920 
2921                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2922 
2923                 return (rval);
2924         }
2925 
2926         case DIF_VAR_CURTHREAD:
2927                 if (!dtrace_priv_kernel(state))
2928                         return (0);
2929                 return ((uint64_t)(uintptr_t)curthread);
2930 
2931         case DIF_VAR_TIMESTAMP:
2932                 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
2933                         mstate->dtms_timestamp = dtrace_gethrtime();
2934                         mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
2935                 }
2936                 return (mstate->dtms_timestamp);
2937 
2938         case DIF_VAR_VTIMESTAMP:
2939                 ASSERT(dtrace_vtime_references != 0);
2940                 return (curthread->t_dtrace_vtime);
2941 
2942         case DIF_VAR_WALLTIMESTAMP:
2943                 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
2944                         mstate->dtms_walltimestamp = dtrace_gethrestime();
2945                         mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
2946                 }
2947                 return (mstate->dtms_walltimestamp);


4435                         }
4436 
4437                         start = 0;
4438                         end = lastdir;
4439                 } else {
4440                         ASSERT(subr == DIF_SUBR_BASENAME);
4441                         ASSERT(firstbase != -1 && lastbase != -1);
4442                         start = firstbase;
4443                         end = lastbase;
4444                 }
4445 
4446                 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4447                         dest[j] = dtrace_load8(src + i);
4448 
4449                 dest[j] = '\0';
4450                 regs[rd] = (uintptr_t)dest;
4451                 mstate->dtms_scratch_ptr += size;
4452                 break;
4453         }
4454 























4455         case DIF_SUBR_CLEANPATH: {
4456                 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4457                 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4458                 uintptr_t src = tupregs[0].dttk_value;
4459                 int i = 0, j = 0;

4460 
4461                 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4462                         regs[rd] = NULL;
4463                         break;
4464                 }
4465 
4466                 if (!DTRACE_INSCRATCH(mstate, size)) {
4467                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4468                         regs[rd] = NULL;
4469                         break;
4470                 }
4471 
4472                 /*
4473                  * Move forward, loading each character.
4474                  */
4475                 do {
4476                         c = dtrace_load8(src + i++);
4477 next:
4478                         if (j + 5 >= size)   /* 5 = strlen("/..c\0") */
4479                                 break;


4538                                 dest[j++] = '/';
4539                                 dest[j++] = '.';
4540                                 dest[j++] = '.';
4541                                 dest[j++] = c;
4542                                 continue;
4543                         }
4544 
4545                         /*
4546                          * This is "/../" or "/..\0".  We need to back up
4547                          * our destination pointer until we find a "/".
4548                          */
4549                         i--;
4550                         while (j != 0 && dest[--j] != '/')
4551                                 continue;
4552 
4553                         if (c == '\0')
4554                                 dest[++j] = '/';
4555                 } while (c != '\0');
4556 
4557                 dest[j] = '\0';

















4558                 regs[rd] = (uintptr_t)dest;
4559                 mstate->dtms_scratch_ptr += size;
4560                 break;
4561         }
4562 
4563         case DIF_SUBR_INET_NTOA:
4564         case DIF_SUBR_INET_NTOA6:
4565         case DIF_SUBR_INET_NTOP: {
4566                 size_t size;
4567                 int af, argi, i;
4568                 char *base, *end;
4569 
4570                 if (subr == DIF_SUBR_INET_NTOP) {
4571                         af = (int)tupregs[0].dttk_value;
4572                         argi = 1;
4573                 } else {
4574                         af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4575                         argi = 0;
4576                 }
4577 


4922                         if (cc_c == 0)
4923                                 pc = DIF_INSTR_LABEL(instr);
4924                         break;
4925                 case DIF_OP_BL:
4926                         if (cc_n ^ cc_v)
4927                                 pc = DIF_INSTR_LABEL(instr);
4928                         break;
4929                 case DIF_OP_BLU:
4930                         if (cc_c)
4931                                 pc = DIF_INSTR_LABEL(instr);
4932                         break;
4933                 case DIF_OP_BLE:
4934                         if (cc_z | (cc_n ^ cc_v))
4935                                 pc = DIF_INSTR_LABEL(instr);
4936                         break;
4937                 case DIF_OP_BLEU:
4938                         if (cc_c | cc_z)
4939                                 pc = DIF_INSTR_LABEL(instr);
4940                         break;
4941                 case DIF_OP_RLDSB:
4942                         if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4943                                 *flags |= CPU_DTRACE_KPRIV;
4944                                 *illval = regs[r1];
4945                                 break;
4946                         }
4947                         /*FALLTHROUGH*/
4948                 case DIF_OP_LDSB:
4949                         regs[rd] = (int8_t)dtrace_load8(regs[r1]);
4950                         break;
4951                 case DIF_OP_RLDSH:
4952                         if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4953                                 *flags |= CPU_DTRACE_KPRIV;
4954                                 *illval = regs[r1];
4955                                 break;
4956                         }
4957                         /*FALLTHROUGH*/
4958                 case DIF_OP_LDSH:
4959                         regs[rd] = (int16_t)dtrace_load16(regs[r1]);
4960                         break;
4961                 case DIF_OP_RLDSW:
4962                         if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4963                                 *flags |= CPU_DTRACE_KPRIV;
4964                                 *illval = regs[r1];
4965                                 break;
4966                         }
4967                         /*FALLTHROUGH*/
4968                 case DIF_OP_LDSW:
4969                         regs[rd] = (int32_t)dtrace_load32(regs[r1]);
4970                         break;
4971                 case DIF_OP_RLDUB:
4972                         if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4973                                 *flags |= CPU_DTRACE_KPRIV;
4974                                 *illval = regs[r1];
4975                                 break;
4976                         }
4977                         /*FALLTHROUGH*/
4978                 case DIF_OP_LDUB:
4979                         regs[rd] = dtrace_load8(regs[r1]);
4980                         break;
4981                 case DIF_OP_RLDUH:
4982                         if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4983                                 *flags |= CPU_DTRACE_KPRIV;
4984                                 *illval = regs[r1];
4985                                 break;
4986                         }
4987                         /*FALLTHROUGH*/
4988                 case DIF_OP_LDUH:
4989                         regs[rd] = dtrace_load16(regs[r1]);
4990                         break;
4991                 case DIF_OP_RLDUW:
4992                         if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4993                                 *flags |= CPU_DTRACE_KPRIV;
4994                                 *illval = regs[r1];
4995                                 break;
4996                         }
4997                         /*FALLTHROUGH*/
4998                 case DIF_OP_LDUW:
4999                         regs[rd] = dtrace_load32(regs[r1]);
5000                         break;
5001                 case DIF_OP_RLDX:
5002                         if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
5003                                 *flags |= CPU_DTRACE_KPRIV;
5004                                 *illval = regs[r1];
5005                                 break;
5006                         }
5007                         /*FALLTHROUGH*/
5008                 case DIF_OP_LDX:
5009                         regs[rd] = dtrace_load64(regs[r1]);
5010                         break;
5011                 case DIF_OP_ULDSB:
5012                         regs[rd] = (int8_t)
5013                             dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5014                         break;
5015                 case DIF_OP_ULDSH:
5016                         regs[rd] = (int16_t)
5017                             dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5018                         break;
5019                 case DIF_OP_ULDSW:
5020                         regs[rd] = (int32_t)
5021                             dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5022                         break;
5023                 case DIF_OP_ULDUB:
5024                         regs[rd] =
5025                             dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5026                         break;


5925                 caddr_t tomax;
5926 
5927                 /*
5928                  * A little subtlety with the following (seemingly innocuous)
5929                  * declaration of the automatic 'val':  by looking at the
5930                  * code, you might think that it could be declared in the
5931                  * action processing loop, below.  (That is, it's only used in
5932                  * the action processing loop.)  However, it must be declared
5933                  * out of that scope because in the case of DIF expression
5934                  * arguments to aggregating actions, one iteration of the
5935                  * action loop will use the last iteration's value.
5936                  */
5937 #ifdef lint
5938                 uint64_t val = 0;
5939 #else
5940                 uint64_t val;
5941 #endif
5942 
5943                 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
5944                 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;


5945                 *flags &= ~CPU_DTRACE_ERROR;
5946 
5947                 if (prov == dtrace_provider) {
5948                         /*
5949                          * If dtrace itself is the provider of this probe,
5950                          * we're only going to continue processing the ECB if
5951                          * arg0 (the dtrace_state_t) is equal to the ECB's
5952                          * creating state.  (This prevents disjoint consumers
5953                          * from seeing one another's metaprobes.)
5954                          */
5955                         if (arg0 != (uint64_t)(uintptr_t)state)
5956                                 continue;
5957                 }
5958 
5959                 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
5960                         /*
5961                          * We're not currently active.  If our provider isn't
5962                          * the dtrace pseudo provider, we're not interested.
5963                          */
5964                         if (prov != dtrace_provider)


8431                 case DIF_OP_STLS:
8432                 case DIF_OP_STGAA:
8433                 case DIF_OP_STTAA:
8434                         if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8435                                 err += efunc(pc, "invalid variable %u\n", v);
8436                         if (rs >= nregs)
8437                                 err += efunc(pc, "invalid register %u\n", rd);
8438                         break;
8439                 case DIF_OP_CALL:
8440                         if (subr > DIF_SUBR_MAX)
8441                                 err += efunc(pc, "invalid subr %u\n", subr);
8442                         if (rd >= nregs)
8443                                 err += efunc(pc, "invalid register %u\n", rd);
8444                         if (rd == 0)
8445                                 err += efunc(pc, "cannot write to %r0\n");
8446 
8447                         if (subr == DIF_SUBR_COPYOUT ||
8448                             subr == DIF_SUBR_COPYOUTSTR) {
8449                                 dp->dtdo_destructive = 1;
8450                         }














8451                         break;
8452                 case DIF_OP_PUSHTR:
8453                         if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8454                                 err += efunc(pc, "invalid ref type %u\n", type);
8455                         if (r2 >= nregs)
8456                                 err += efunc(pc, "invalid register %u\n", r2);
8457                         if (rs >= nregs)
8458                                 err += efunc(pc, "invalid register %u\n", rs);
8459                         break;
8460                 case DIF_OP_PUSHTV:
8461                         if (type != DIF_TYPE_CTF)
8462                                 err += efunc(pc, "invalid val type %u\n", type);
8463                         if (r2 >= nregs)
8464                                 err += efunc(pc, "invalid register %u\n", r2);
8465                         if (rs >= nregs)
8466                                 err += efunc(pc, "invalid register %u\n", rs);
8467                         break;
8468                 default:
8469                         err += efunc(pc, "invalid opcode %u\n",
8470                             DIF_INSTR_OP(instr));


13073         hdlr.cyh_arg = state;
13074         hdlr.cyh_level = CY_LOW_LEVEL;
13075 
13076         when.cyt_when = 0;
13077         when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13078 
13079         state->dts_cleaner = cyclic_add(&hdlr, &when);
13080 
13081         hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13082         hdlr.cyh_arg = state;
13083         hdlr.cyh_level = CY_LOW_LEVEL;
13084 
13085         when.cyt_when = 0;
13086         when.cyt_interval = dtrace_deadman_interval;
13087 
13088         state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13089         state->dts_deadman = cyclic_add(&hdlr, &when);
13090 
13091         state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13092 
















13093         /*
13094          * Now it's time to actually fire the BEGIN probe.  We need to disable
13095          * interrupts here both to record the CPU on which we fired the BEGIN
13096          * probe (the data from this CPU will be processed first at user
13097          * level) and to manually activate the buffer for this CPU.
13098          */
13099         cookie = dtrace_interrupt_disable();
13100         *cpu = CPU->cpu_id;
13101         ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13102         state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13103 
13104         dtrace_probe(dtrace_probeid_begin,
13105             (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13106         dtrace_interrupt_enable(cookie);
13107         /*
13108          * We may have had an exit action from a BEGIN probe; only change our
13109          * state to ACTIVE if we're still in WARMUP.
13110          */
13111         ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13112             state->dts_activity == DTRACE_ACTIVITY_DRAINING);


13189         ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13190 
13191         /*
13192          * Finally, we can release the reserve and call the END probe.  We
13193          * disable interrupts across calling the END probe to allow us to
13194          * return the CPU on which we actually called the END probe.  This
13195          * allows user-land to be sure that this CPU's principal buffer is
13196          * processed last.
13197          */
13198         state->dts_reserve = 0;
13199 
13200         cookie = dtrace_interrupt_disable();
13201         *cpu = CPU->cpu_id;
13202         dtrace_probe(dtrace_probeid_end,
13203             (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13204         dtrace_interrupt_enable(cookie);
13205 
13206         state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13207         dtrace_sync();
13208 


















13209         return (0);
13210 }
13211 
13212 static int
13213 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13214     dtrace_optval_t val)
13215 {
13216         ASSERT(MUTEX_HELD(&dtrace_lock));
13217 
13218         if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13219                 return (EBUSY);
13220 
13221         if (option >= DTRACEOPT_MAX)
13222                 return (EINVAL);
13223 
13224         if (option != DTRACEOPT_CPU && val < 0)
13225                 return (EINVAL);
13226 
13227         switch (option) {
13228         case DTRACEOPT_DESTRUCTIVE:


14749                 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14750                 range = kmem_zalloc(nsize, KM_SLEEP);
14751 
14752                 if (dtrace_toxrange != NULL) {
14753                         ASSERT(osize != 0);
14754                         bcopy(dtrace_toxrange, range, osize);
14755                         kmem_free(dtrace_toxrange, osize);
14756                 }
14757 
14758                 dtrace_toxrange = range;
14759         }
14760 
14761         ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14762         ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14763 
14764         dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14765         dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14766         dtrace_toxranges++;
14767 }
14768 

















14769 /*
14770  * DTrace Driver Cookbook Functions
14771  */
14772 /*ARGSUSED*/
14773 static int
14774 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14775 {
14776         dtrace_provider_id_t id;
14777         dtrace_state_t *state = NULL;
14778         dtrace_enabling_t *enab;
14779 
14780         mutex_enter(&cpu_lock);
14781         mutex_enter(&dtrace_provider_lock);
14782         mutex_enter(&dtrace_lock);
14783 
14784         if (ddi_soft_state_init(&dtrace_softstate,
14785             sizeof (dtrace_state_t), 0) != 0) {
14786                 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14787                 mutex_exit(&cpu_lock);
14788                 mutex_exit(&dtrace_provider_lock);


15905 
15906                 /*
15907                  * If we're being detached with anonymous state, we need to
15908                  * indicate to the kernel debugger that DTrace is now inactive.
15909                  */
15910                 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
15911         }
15912 
15913         bzero(&dtrace_anon, sizeof (dtrace_anon_t));
15914         unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
15915         dtrace_cpu_init = NULL;
15916         dtrace_helpers_cleanup = NULL;
15917         dtrace_helpers_fork = NULL;
15918         dtrace_cpustart_init = NULL;
15919         dtrace_cpustart_fini = NULL;
15920         dtrace_debugger_init = NULL;
15921         dtrace_debugger_fini = NULL;
15922         dtrace_modload = NULL;
15923         dtrace_modunload = NULL;
15924 



15925         mutex_exit(&cpu_lock);
15926 
15927         if (dtrace_helptrace_enabled) {
15928                 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
15929                 dtrace_helptrace_buffer = NULL;
15930         }
15931 
15932         kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
15933         dtrace_probes = NULL;
15934         dtrace_nprobes = 0;
15935 
15936         dtrace_hash_destroy(dtrace_bymod);
15937         dtrace_hash_destroy(dtrace_byfunc);
15938         dtrace_hash_destroy(dtrace_byname);
15939         dtrace_bymod = NULL;
15940         dtrace_byfunc = NULL;
15941         dtrace_byname = NULL;
15942 
15943         kmem_cache_destroy(dtrace_state_cache);
15944         vmem_destroy(dtrace_minor);




 154  * available to DTrace consumers via the backtick (`) syntax.  One of these,
 155  * dtrace_zero, is made deliberately so:  it is provided as a source of
 156  * well-known, zero-filled memory.  While this variable is not documented,
 157  * it is used by some translators as an implementation detail.
 158  */
 159 const char      dtrace_zero[256] = { 0 };       /* zero-filled memory */
 160 
 161 /*
 162  * DTrace Internal Variables
 163  */
 164 static dev_info_t       *dtrace_devi;           /* device info */
 165 static vmem_t           *dtrace_arena;          /* probe ID arena */
 166 static vmem_t           *dtrace_minor;          /* minor number arena */
 167 static taskq_t          *dtrace_taskq;          /* task queue */
 168 static dtrace_probe_t   **dtrace_probes;        /* array of all probes */
 169 static int              dtrace_nprobes;         /* number of probes */
 170 static dtrace_provider_t *dtrace_provider;      /* provider list */
 171 static dtrace_meta_t    *dtrace_meta_pid;       /* user-land meta provider */
 172 static int              dtrace_opens;           /* number of opens */
 173 static int              dtrace_helpers;         /* number of helpers */
 174 static int              dtrace_getf;            /* number of unpriv getf()s */
 175 static void             *dtrace_softstate;      /* softstate pointer */
 176 static dtrace_hash_t    *dtrace_bymod;          /* probes hashed by module */
 177 static dtrace_hash_t    *dtrace_byfunc;         /* probes hashed by function */
 178 static dtrace_hash_t    *dtrace_byname;         /* probes hashed by name */
 179 static dtrace_toxrange_t *dtrace_toxrange;      /* toxic range array */
 180 static int              dtrace_toxranges;       /* number of toxic ranges */
 181 static int              dtrace_toxranges_max;   /* size of toxic range array */
 182 static dtrace_anon_t    dtrace_anon;            /* anonymous enabling */
 183 static kmem_cache_t     *dtrace_state_cache;    /* cache for dynamic state */
 184 static uint64_t         dtrace_vtime_references; /* number of vtimestamp refs */
 185 static kthread_t        *dtrace_panicked;       /* panicking thread */
 186 static dtrace_ecb_t     *dtrace_ecb_create_cache; /* cached created ECB */
 187 static dtrace_genid_t   dtrace_probegen;        /* current probe generation */
 188 static dtrace_helpers_t *dtrace_deferred_pid;   /* deferred helper list */
 189 static dtrace_enabling_t *dtrace_retained;      /* list of retained enablings */
 190 static dtrace_genid_t   dtrace_retained_gen;    /* current retained enab gen */
 191 static dtrace_dynvar_t  dtrace_dynhash_sink;    /* end of dynamic hash chains */
 192 static int              dtrace_dynvar_failclean; /* dynvars failed to clean */
 193 
 194 /*


 357         *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
 358 
 359 #ifndef __x86
 360 #define DTRACE_ALIGNCHECK(addr, size, flags)                            \
 361         if (addr & (size - 1)) {                                    \
 362                 *flags |= CPU_DTRACE_BADALIGN;                          \
 363                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;     \
 364                 return (0);                                             \
 365         }
 366 #else
 367 #define DTRACE_ALIGNCHECK(addr, size, flags)
 368 #endif
 369 
 370 /*
 371  * Test whether a range of memory starting at testaddr of size testsz falls
 372  * within the range of memory described by addr, sz.  We take care to avoid
 373  * problems with overflow and underflow of the unsigned quantities, and
 374  * disallow all negative sizes.  Ranges of size 0 are allowed.
 375  */
 376 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
 377         ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
 378         (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
 379         (testaddr) + (testsz) >= (testaddr))
 380 
 381 /*
 382  * Test whether alloc_sz bytes will fit in the scratch region.  We isolate
 383  * alloc_sz on the righthand side of the comparison in order to avoid overflow
 384  * or underflow in the comparison with it.  This is simpler than the INRANGE
 385  * check above, because we know that the dtms_scratch_ptr is valid in the
 386  * range.  Allocations of size zero are allowed.
 387  */
 388 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
 389         ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
 390         (mstate)->dtms_scratch_ptr >= (alloc_sz))
 391 
 392 #define DTRACE_LOADFUNC(bits)                                           \
 393 /*CSTYLED*/                                                             \
 394 uint##bits##_t                                                          \
 395 dtrace_load##bits(uintptr_t addr)                                       \
 396 {                                                                       \
 397         size_t size = bits / NBBY;                                      \
 398         /*CSTYLED*/                                                     \


 459         (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
 460 
 461 static size_t dtrace_strlen(const char *, size_t);
 462 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
 463 static void dtrace_enabling_provide(dtrace_provider_t *);
 464 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
 465 static void dtrace_enabling_matchall(void);
 466 static void dtrace_enabling_reap(void);
 467 static dtrace_state_t *dtrace_anon_grab(void);
 468 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
 469     dtrace_state_t *, uint64_t, uint64_t);
 470 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
 471 static void dtrace_buffer_drop(dtrace_buffer_t *);
 472 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
 473 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
 474     dtrace_state_t *, dtrace_mstate_t *);
 475 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
 476     dtrace_optval_t);
 477 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
 478 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
 479 static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
 480 static void dtrace_getf_barrier(void);
 481 
 482 /*
 483  * DTrace Probe Context Functions
 484  *
 485  * These functions are called from probe context.  Because probe context is
 486  * any context in which C may be called, arbitrarily locks may be held,
 487  * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
 488  * As a result, functions called from probe context may only call other DTrace
 489  * support functions -- they may not interact at all with the system at large.
 490  * (Note that the ASSERT macro is made probe-context safe by redefining it in
 491  * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
 492  * loads are to be performed from probe context, they _must_ be in terms of
 493  * the safe dtrace_load*() variants.
 494  *
 495  * Some functions in this block are not actually called from probe context;
 496  * for these functions, there will be a comment above the function reading
 497  * "Note:  not called from probe context."
 498  */
 499 void
 500 dtrace_panic(const char *format, ...)


 605  * be issued.  This includes the DTrace scratch areas, and any DTrace variable
 606  * region.  The caller of dtrace_canstore() is responsible for performing any
 607  * alignment checks that are needed before stores are actually executed.
 608  */
 609 static int
 610 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 611     dtrace_vstate_t *vstate)
 612 {
 613         /*
 614          * First, check to see if the address is in scratch space...
 615          */
 616         if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
 617             mstate->dtms_scratch_size))
 618                 return (1);
 619 
 620         /*
 621          * Now check to see if it's a dynamic variable.  This check will pick
 622          * up both thread-local variables and any global dynamically-allocated
 623          * variables.
 624          */
 625         if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
 626             vstate->dtvs_dynvars.dtds_size)) {
 627                 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 628                 uintptr_t base = (uintptr_t)dstate->dtds_base +
 629                     (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
 630                 uintptr_t chunkoffs;
 631 
 632                 /*
 633                  * Before we assume that we can store here, we need to make
 634                  * sure that it isn't in our metadata -- storing to our
 635                  * dynamic variable metadata would corrupt our state.  For
 636                  * the range to not include any dynamic variable metadata,
 637                  * it must:
 638                  *
 639                  *      (1) Start above the hash table that is at the base of
 640                  *      the dynamic variable space
 641                  *
 642                  *      (2) Have a starting chunk offset that is beyond the
 643                  *      dtrace_dynvar_t that is at the base of every chunk
 644                  *
 645                  *      (3) Not span a chunk boundary


 672                 return (1);
 673 
 674         return (0);
 675 }
 676 
 677 
 678 /*
 679  * Convenience routine to check to see if the address is within a memory
 680  * region in which a load may be issued given the user's privilege level;
 681  * if not, it sets the appropriate error flags and loads 'addr' into the
 682  * illegal value slot.
 683  *
 684  * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
 685  * appropriate memory access protection.
 686  */
 687 static int
 688 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 689     dtrace_vstate_t *vstate)
 690 {
 691         volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
 692         file_t *fp;
 693 
 694         /*
 695          * If we hold the privilege to read from kernel memory, then
 696          * everything is readable.
 697          */
 698         if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
 699                 return (1);
 700 
 701         /*
 702          * You can obviously read that which you can store.
 703          */
 704         if (dtrace_canstore(addr, sz, mstate, vstate))
 705                 return (1);
 706 
 707         /*
 708          * We're allowed to read from our own string table.
 709          */
 710         if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
 711             mstate->dtms_difo->dtdo_strlen))
 712                 return (1);
 713 
 714         if (vstate->dtvs_state != NULL &&
 715             dtrace_priv_proc(vstate->dtvs_state, mstate)) {
 716                 proc_t *p;
 717 
 718                 /*
 719                  * When we have privileges to the current process, there are
 720                  * several context-related kernel structures that are safe to
 721                  * read, even absent the privilege to read from kernel memory.
 722                  * These reads are safe because these structures contain only
 723                  * state that (1) we're permitted to read, (2) is harmless or
 724                  * (3) contains pointers to additional kernel state that we're
 725                  * not permitted to read (and as such, do not present an
 726                  * opportunity for privilege escalation).  Finally (and
 727                  * critically), because of the nature of their relation with
 728                  * the current thread context, the memory associated with these
 729                  * structures cannot change over the duration of probe context,
 730                  * and it is therefore impossible for this memory to be
 731                  * deallocated and reallocated as something else while it's
 732                  * being operated upon.
 733                  */
 734                 if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
 735                         return (1);
 736 
 737                 if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
 738                     sz, curthread->t_procp, sizeof (proc_t))) {
 739                         return (1);
 740                 }
 741 
 742                 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
 743                     curthread->t_cred, sizeof (cred_t))) {
 744                         return (1);
 745                 }
 746 
 747                 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
 748                     &(p->p_pidp->pid_id), sizeof (pid_t))) {
 749                         return (1);
 750                 }
 751 
 752                 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
 753                     curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
 754                         return (1);
 755                 }
 756         }
 757 
 758         if ((fp = mstate->dtms_getf) != NULL) {
 759                 uintptr_t psz = sizeof (void *);
 760                 vnode_t *vp;
 761                 vnodeops_t *op;
 762 
 763                 /*
 764                  * When getf() returns a file_t, the enabling is implicitly
 765                  * granted the (transient) right to read the returned file_t
 766                  * as well as the v_path and v_op->vnop_name of the underlying
 767                  * vnode.  These accesses are allowed after a successful
 768                  * getf() because the members that they refer to cannot change
 769                  * once set -- and the barrier logic in the kernel's closef()
 770                  * path assures that the file_t and its referenced vode_t
 771                  * cannot themselves be stale (that is, it impossible for
 772                  * either dtms_getf itself or its f_vnode member to reference
 773                  * freed memory).
 774                  */
 775                 if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
 776                         return (1);
 777 
 778                 if ((vp = fp->f_vnode) != NULL) {
 779                         if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
 780                                 return (1);
 781 
 782                         if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
 783                             vp->v_path, strlen(vp->v_path) + 1)) {
 784                                 return (1);
 785                         }
 786 
 787                         if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
 788                                 return (1);
 789 
 790                         if ((op = vp->v_op) != NULL &&
 791                             DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
 792                                 return (1);
 793                         }
 794 
 795                         if (op != NULL && op->vnop_name != NULL &&
 796                             DTRACE_INRANGE(addr, sz, op->vnop_name,
 797                             strlen(op->vnop_name) + 1)) {
 798                                 return (1);
 799                         }
 800                 }
 801         }
 802 
 803         DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
 804         *illval = addr;
 805         return (0);
 806 }
 807 
 808 /*
 809  * Convenience routine to check to see if a given string is within a memory
 810  * region in which a load may be issued given the user's privilege level;
 811  * this exists so that we don't need to issue unnecessary dtrace_strlen()
 812  * calls in the event that the user has all privileges.
 813  */
 814 static int
 815 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 816     dtrace_vstate_t *vstate)
 817 {
 818         size_t strsz;
 819 
 820         /*
 821          * If we hold the privilege to read from kernel memory, then
 822          * everything is readable.


1161 
1162         return (0);
1163 }
1164 
1165 /*
1166  * This privilege check should be used by actions and subroutines to
1167  * verify that the zone of the process that enabled the invoking ECB
1168  * matches the target credentials
1169  */
1170 static int
1171 dtrace_priv_proc_common_zone(dtrace_state_t *state)
1172 {
1173         cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1174 
1175         /*
1176          * We should always have a non-NULL state cred here, since if cred
1177          * is null (anonymous tracing), we fast-path bypass this routine.
1178          */
1179         ASSERT(s_cr != NULL);
1180 
1181         if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)

1182                 return (1);
1183 
1184         return (0);
1185 }
1186 
1187 /*
1188  * This privilege check should be used by actions and subroutines to
1189  * verify that the process has not setuid or changed credentials.
1190  */
1191 static int
1192 dtrace_priv_proc_common_nocd()
1193 {
1194         proc_t *proc;
1195 
1196         if ((proc = ttoproc(curthread)) != NULL &&
1197             !(proc->p_flag & SNOCD))
1198                 return (1);
1199 
1200         return (0);
1201 }


1284  * Determine if the dte_cond of the specified ECB allows for processing of
1285  * the current probe to continue.  Note that this routine may allow continued
1286  * processing, but with access(es) stripped from the mstate's dtms_access
1287  * field.
1288  */
1289 static int
1290 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1291     dtrace_ecb_t *ecb)
1292 {
1293         dtrace_probe_t *probe = ecb->dte_probe;
1294         dtrace_provider_t *prov = probe->dtpr_provider;
1295         dtrace_pops_t *pops = &prov->dtpv_pops;
1296         int mode = DTRACE_MODE_NOPRIV_DROP;
1297 
1298         ASSERT(ecb->dte_cond);
1299 
1300         if (pops->dtps_mode != NULL) {
1301                 mode = pops->dtps_mode(prov->dtpv_arg,
1302                     probe->dtpr_id, probe->dtpr_arg);
1303 
1304                 ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
1305                 ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
1306                     DTRACE_MODE_NOPRIV_DROP));

1307         }
1308 
1309         /*
1310          * If the dte_cond bits indicate that this consumer is only allowed to
1311          * see user-mode firings of this probe, check that the probe was fired
1312          * while in a user context.  If that's not the case, use the policy
1313          * specified by the provider to determine if we drop the probe or
1314          * merely restrict operation.

1315          */
1316         if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1317                 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1318 
1319                 if (!(mode & DTRACE_MODE_USER)) {
1320                         if (mode & DTRACE_MODE_NOPRIV_DROP)
1321                                 return (0);
1322 
1323                         mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1324                 }
1325         }
1326 
1327         /*
1328          * This is more subtle than it looks. We have to be absolutely certain
1329          * that CRED() isn't going to change out from under us so it's only
1330          * legit to examine that structure if we're in constrained situations.
1331          * Currently, the only times we'll this check is if a non-super-user
1332          * has enabled the profile or syscall providers -- providers that
1333          * allow visibility of all processes. For the profile case, the check
1334          * above will ensure that we're examining a user context.


1361          * in our zone, check to see if our mode policy is to restrict rather
1362          * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1363          * and DTRACE_ACCESS_ARGS
1364          */
1365         if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1366                 cred_t *cr;
1367                 cred_t *s_cr = state->dts_cred.dcr_cred;
1368 
1369                 ASSERT(s_cr != NULL);
1370 
1371                 if ((cr = CRED()) == NULL ||
1372                     s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1373                         if (mode & DTRACE_MODE_NOPRIV_DROP)
1374                                 return (0);
1375 
1376                         mstate->dtms_access &=
1377                             ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1378                 }
1379         }
1380 
1381         /*
1382          * By merits of being in this code path at all, we have limited
1383          * privileges.  If the provider has indicated that limited privileges
1384          * are to denote restricted operation, strip off the ability to access
1385          * arguments.
1386          */
1387         if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
1388                 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1389 
1390         return (1);
1391 }
1392 
1393 /*
1394  * Note:  not called from probe context.  This function is called
1395  * asynchronously (and at a regular interval) from outside of probe context to
1396  * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable
1397  * cleaning is explained in detail in <sys/dtrace_impl.h>.
1398  */
1399 void
1400 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1401 {
1402         dtrace_dynvar_t *dirty;
1403         dtrace_dstate_percpu_t *dcpu;
1404         dtrace_dynvar_t **rinsep;
1405         int i, j, work = 0;
1406 
1407         for (i = 0; i < NCPU; i++) {
1408                 dcpu = &dstate->dtds_percpu[i];
1409                 rinsep = &dcpu->dtdsc_rinsing;


3006                 return (dtrace_getreg(lwp->lwp_regs, ndx));
3007         }
3008 
3009         case DIF_VAR_VMREGS: {
3010                 uint64_t rval;
3011 
3012                 if (!dtrace_priv_kernel(state))
3013                         return (0);
3014 
3015                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3016 
3017                 rval = dtrace_getvmreg(ndx,
3018                     &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
3019 
3020                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3021 
3022                 return (rval);
3023         }
3024 
3025         case DIF_VAR_CURTHREAD:
3026                 if (!dtrace_priv_proc(state, mstate))
3027                         return (0);
3028                 return ((uint64_t)(uintptr_t)curthread);
3029 
3030         case DIF_VAR_TIMESTAMP:
3031                 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
3032                         mstate->dtms_timestamp = dtrace_gethrtime();
3033                         mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
3034                 }
3035                 return (mstate->dtms_timestamp);
3036 
3037         case DIF_VAR_VTIMESTAMP:
3038                 ASSERT(dtrace_vtime_references != 0);
3039                 return (curthread->t_dtrace_vtime);
3040 
3041         case DIF_VAR_WALLTIMESTAMP:
3042                 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
3043                         mstate->dtms_walltimestamp = dtrace_gethrestime();
3044                         mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
3045                 }
3046                 return (mstate->dtms_walltimestamp);


4534                         }
4535 
4536                         start = 0;
4537                         end = lastdir;
4538                 } else {
4539                         ASSERT(subr == DIF_SUBR_BASENAME);
4540                         ASSERT(firstbase != -1 && lastbase != -1);
4541                         start = firstbase;
4542                         end = lastbase;
4543                 }
4544 
4545                 for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4546                         dest[j] = dtrace_load8(src + i);
4547 
4548                 dest[j] = '\0';
4549                 regs[rd] = (uintptr_t)dest;
4550                 mstate->dtms_scratch_ptr += size;
4551                 break;
4552         }
4553 
4554         case DIF_SUBR_GETF: {
4555                 uintptr_t fd = tupregs[0].dttk_value;
4556                 uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
4557                 file_t *fp;
4558 
4559                 if (!dtrace_priv_proc(state, mstate)) {
4560                         regs[rd] = NULL;
4561                         break;
4562                 }
4563 
4564                 /*
4565                  * This is safe because fi_nfiles only increases, and the
4566                  * fi_list array is not freed when the array size doubles.
4567                  * (See the comment in flist_grow() for details on the
4568                  * management of the u_finfo structure.)
4569                  */
4570                 fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
4571 
4572                 mstate->dtms_getf = fp;
4573                 regs[rd] = (uintptr_t)fp;
4574                 break;
4575         }
4576 
4577         case DIF_SUBR_CLEANPATH: {
4578                 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4579                 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4580                 uintptr_t src = tupregs[0].dttk_value;
4581                 int i = 0, j = 0;
4582                 zone_t *z;
4583 
4584                 if (!dtrace_strcanload(src, size, mstate, vstate)) {
4585                         regs[rd] = NULL;
4586                         break;
4587                 }
4588 
4589                 if (!DTRACE_INSCRATCH(mstate, size)) {
4590                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4591                         regs[rd] = NULL;
4592                         break;
4593                 }
4594 
4595                 /*
4596                  * Move forward, loading each character.
4597                  */
4598                 do {
4599                         c = dtrace_load8(src + i++);
4600 next:
4601                         if (j + 5 >= size)   /* 5 = strlen("/..c\0") */
4602                                 break;


4661                                 dest[j++] = '/';
4662                                 dest[j++] = '.';
4663                                 dest[j++] = '.';
4664                                 dest[j++] = c;
4665                                 continue;
4666                         }
4667 
4668                         /*
4669                          * This is "/../" or "/..\0".  We need to back up
4670                          * our destination pointer until we find a "/".
4671                          */
4672                         i--;
4673                         while (j != 0 && dest[--j] != '/')
4674                                 continue;
4675 
4676                         if (c == '\0')
4677                                 dest[++j] = '/';
4678                 } while (c != '\0');
4679 
4680                 dest[j] = '\0';
4681 
4682                 if (mstate->dtms_getf != NULL &&
4683                     !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
4684                     (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
4685                         /*
4686                          * If we've done a getf() as a part of this ECB and we
4687                          * don't have kernel access (and we're not in the global
4688                          * zone), check if the path we cleaned up begins with
4689                          * the zone's root path, and trim it off if so.  Note
4690                          * that this is an output cleanliness issue, not a
4691                          * security issue: knowing one's zone root path does
4692                          * not enable privilege escalation.
4693                          */
4694                         if (strstr(dest, z->zone_rootpath) == dest)
4695                                 dest += strlen(z->zone_rootpath) - 1;
4696                 }
4697 
4698                 regs[rd] = (uintptr_t)dest;
4699                 mstate->dtms_scratch_ptr += size;
4700                 break;
4701         }
4702 
4703         case DIF_SUBR_INET_NTOA:
4704         case DIF_SUBR_INET_NTOA6:
4705         case DIF_SUBR_INET_NTOP: {
4706                 size_t size;
4707                 int af, argi, i;
4708                 char *base, *end;
4709 
4710                 if (subr == DIF_SUBR_INET_NTOP) {
4711                         af = (int)tupregs[0].dttk_value;
4712                         argi = 1;
4713                 } else {
4714                         af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4715                         argi = 0;
4716                 }
4717 


5062                         if (cc_c == 0)
5063                                 pc = DIF_INSTR_LABEL(instr);
5064                         break;
5065                 case DIF_OP_BL:
5066                         if (cc_n ^ cc_v)
5067                                 pc = DIF_INSTR_LABEL(instr);
5068                         break;
5069                 case DIF_OP_BLU:
5070                         if (cc_c)
5071                                 pc = DIF_INSTR_LABEL(instr);
5072                         break;
5073                 case DIF_OP_BLE:
5074                         if (cc_z | (cc_n ^ cc_v))
5075                                 pc = DIF_INSTR_LABEL(instr);
5076                         break;
5077                 case DIF_OP_BLEU:
5078                         if (cc_c | cc_z)
5079                                 pc = DIF_INSTR_LABEL(instr);
5080                         break;
5081                 case DIF_OP_RLDSB:
5082                         if (!dtrace_canload(regs[r1], 1, mstate, vstate))


5083                                 break;

5084                         /*FALLTHROUGH*/
5085                 case DIF_OP_LDSB:
5086                         regs[rd] = (int8_t)dtrace_load8(regs[r1]);
5087                         break;
5088                 case DIF_OP_RLDSH:
5089                         if (!dtrace_canload(regs[r1], 2, mstate, vstate))


5090                                 break;

5091                         /*FALLTHROUGH*/
5092                 case DIF_OP_LDSH:
5093                         regs[rd] = (int16_t)dtrace_load16(regs[r1]);
5094                         break;
5095                 case DIF_OP_RLDSW:
5096                         if (!dtrace_canload(regs[r1], 4, mstate, vstate))


5097                                 break;

5098                         /*FALLTHROUGH*/
5099                 case DIF_OP_LDSW:
5100                         regs[rd] = (int32_t)dtrace_load32(regs[r1]);
5101                         break;
5102                 case DIF_OP_RLDUB:
5103                         if (!dtrace_canload(regs[r1], 1, mstate, vstate))


5104                                 break;

5105                         /*FALLTHROUGH*/
5106                 case DIF_OP_LDUB:
5107                         regs[rd] = dtrace_load8(regs[r1]);
5108                         break;
5109                 case DIF_OP_RLDUH:
5110                         if (!dtrace_canload(regs[r1], 2, mstate, vstate))


5111                                 break;

5112                         /*FALLTHROUGH*/
5113                 case DIF_OP_LDUH:
5114                         regs[rd] = dtrace_load16(regs[r1]);
5115                         break;
5116                 case DIF_OP_RLDUW:
5117                         if (!dtrace_canload(regs[r1], 4, mstate, vstate))


5118                                 break;

5119                         /*FALLTHROUGH*/
5120                 case DIF_OP_LDUW:
5121                         regs[rd] = dtrace_load32(regs[r1]);
5122                         break;
5123                 case DIF_OP_RLDX:
5124                         if (!dtrace_canload(regs[r1], 8, mstate, vstate))


5125                                 break;

5126                         /*FALLTHROUGH*/
5127                 case DIF_OP_LDX:
5128                         regs[rd] = dtrace_load64(regs[r1]);
5129                         break;
5130                 case DIF_OP_ULDSB:
5131                         regs[rd] = (int8_t)
5132                             dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5133                         break;
5134                 case DIF_OP_ULDSH:
5135                         regs[rd] = (int16_t)
5136                             dtrace_fuword16((void *)(uintptr_t)regs[r1]);
5137                         break;
5138                 case DIF_OP_ULDSW:
5139                         regs[rd] = (int32_t)
5140                             dtrace_fuword32((void *)(uintptr_t)regs[r1]);
5141                         break;
5142                 case DIF_OP_ULDUB:
5143                         regs[rd] =
5144                             dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5145                         break;


6044                 caddr_t tomax;
6045 
6046                 /*
6047                  * A little subtlety with the following (seemingly innocuous)
6048                  * declaration of the automatic 'val':  by looking at the
6049                  * code, you might think that it could be declared in the
6050                  * action processing loop, below.  (That is, it's only used in
6051                  * the action processing loop.)  However, it must be declared
6052                  * out of that scope because in the case of DIF expression
6053                  * arguments to aggregating actions, one iteration of the
6054                  * action loop will use the last iteration's value.
6055                  */
6056 #ifdef lint
6057                 uint64_t val = 0;
6058 #else
6059                 uint64_t val;
6060 #endif
6061 
6062                 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
6063                 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
6064                 mstate.dtms_getf = NULL;
6065 
6066                 *flags &= ~CPU_DTRACE_ERROR;
6067 
6068                 if (prov == dtrace_provider) {
6069                         /*
6070                          * If dtrace itself is the provider of this probe,
6071                          * we're only going to continue processing the ECB if
6072                          * arg0 (the dtrace_state_t) is equal to the ECB's
6073                          * creating state.  (This prevents disjoint consumers
6074                          * from seeing one another's metaprobes.)
6075                          */
6076                         if (arg0 != (uint64_t)(uintptr_t)state)
6077                                 continue;
6078                 }
6079 
6080                 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
6081                         /*
6082                          * We're not currently active.  If our provider isn't
6083                          * the dtrace pseudo provider, we're not interested.
6084                          */
6085                         if (prov != dtrace_provider)


8552                 case DIF_OP_STLS:
8553                 case DIF_OP_STGAA:
8554                 case DIF_OP_STTAA:
8555                         if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8556                                 err += efunc(pc, "invalid variable %u\n", v);
8557                         if (rs >= nregs)
8558                                 err += efunc(pc, "invalid register %u\n", rd);
8559                         break;
8560                 case DIF_OP_CALL:
8561                         if (subr > DIF_SUBR_MAX)
8562                                 err += efunc(pc, "invalid subr %u\n", subr);
8563                         if (rd >= nregs)
8564                                 err += efunc(pc, "invalid register %u\n", rd);
8565                         if (rd == 0)
8566                                 err += efunc(pc, "cannot write to %r0\n");
8567 
8568                         if (subr == DIF_SUBR_COPYOUT ||
8569                             subr == DIF_SUBR_COPYOUTSTR) {
8570                                 dp->dtdo_destructive = 1;
8571                         }
8572 
8573                         if (subr == DIF_SUBR_GETF) {
8574                                 /*
8575                                  * If we have a getf() we need to record that
8576                                  * in our state.  Note that our state can be
8577                                  * NULL if this is a helper -- but in that
8578                                  * case, the call to getf() is itself illegal,
8579                                  * and will be caught (slightly later) when
8580                                  * the helper is validated.
8581                                  */
8582                                 if (vstate->dtvs_state != NULL)
8583                                         vstate->dtvs_state->dts_getf++;
8584                         }
8585 
8586                         break;
8587                 case DIF_OP_PUSHTR:
8588                         if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8589                                 err += efunc(pc, "invalid ref type %u\n", type);
8590                         if (r2 >= nregs)
8591                                 err += efunc(pc, "invalid register %u\n", r2);
8592                         if (rs >= nregs)
8593                                 err += efunc(pc, "invalid register %u\n", rs);
8594                         break;
8595                 case DIF_OP_PUSHTV:
8596                         if (type != DIF_TYPE_CTF)
8597                                 err += efunc(pc, "invalid val type %u\n", type);
8598                         if (r2 >= nregs)
8599                                 err += efunc(pc, "invalid register %u\n", r2);
8600                         if (rs >= nregs)
8601                                 err += efunc(pc, "invalid register %u\n", rs);
8602                         break;
8603                 default:
8604                         err += efunc(pc, "invalid opcode %u\n",
8605                             DIF_INSTR_OP(instr));


13208         hdlr.cyh_arg = state;
13209         hdlr.cyh_level = CY_LOW_LEVEL;
13210 
13211         when.cyt_when = 0;
13212         when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13213 
13214         state->dts_cleaner = cyclic_add(&hdlr, &when);
13215 
13216         hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13217         hdlr.cyh_arg = state;
13218         hdlr.cyh_level = CY_LOW_LEVEL;
13219 
13220         when.cyt_when = 0;
13221         when.cyt_interval = dtrace_deadman_interval;
13222 
13223         state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13224         state->dts_deadman = cyclic_add(&hdlr, &when);
13225 
13226         state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13227 
13228         if (state->dts_getf != 0 &&
13229             !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13230                 /*
13231                  * We don't have kernel privs but we have at least one call
13232                  * to getf(); we need to bump our zone's count, and (if
13233                  * this is the first enabling to have an unprivileged call
13234                  * to getf()) we need to hook into closef().
13235                  */
13236                 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
13237 
13238                 if (dtrace_getf++ == 0) {
13239                         ASSERT(dtrace_closef == NULL);
13240                         dtrace_closef = dtrace_getf_barrier;
13241                 }
13242         }
13243 
13244         /*
13245          * Now it's time to actually fire the BEGIN probe.  We need to disable
13246          * interrupts here both to record the CPU on which we fired the BEGIN
13247          * probe (the data from this CPU will be processed first at user
13248          * level) and to manually activate the buffer for this CPU.
13249          */
13250         cookie = dtrace_interrupt_disable();
13251         *cpu = CPU->cpu_id;
13252         ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13253         state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13254 
13255         dtrace_probe(dtrace_probeid_begin,
13256             (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13257         dtrace_interrupt_enable(cookie);
13258         /*
13259          * We may have had an exit action from a BEGIN probe; only change our
13260          * state to ACTIVE if we're still in WARMUP.
13261          */
13262         ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13263             state->dts_activity == DTRACE_ACTIVITY_DRAINING);


13340         ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13341 
13342         /*
13343          * Finally, we can release the reserve and call the END probe.  We
13344          * disable interrupts across calling the END probe to allow us to
13345          * return the CPU on which we actually called the END probe.  This
13346          * allows user-land to be sure that this CPU's principal buffer is
13347          * processed last.
13348          */
13349         state->dts_reserve = 0;
13350 
13351         cookie = dtrace_interrupt_disable();
13352         *cpu = CPU->cpu_id;
13353         dtrace_probe(dtrace_probeid_end,
13354             (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13355         dtrace_interrupt_enable(cookie);
13356 
13357         state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13358         dtrace_sync();
13359 
13360         if (state->dts_getf != 0 &&
13361             !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
13362                 /*
13363                  * We don't have kernel privs but we have at least one call
13364                  * to getf(); we need to lower our zone's count, and (if
13365                  * this is the last enabling to have an unprivileged call
13366                  * to getf()) we need to clear the closef() hook.
13367                  */
13368                 ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
13369                 ASSERT(dtrace_closef == dtrace_getf_barrier);
13370                 ASSERT(dtrace_getf > 0);
13371 
13372                 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
13373 
13374                 if (--dtrace_getf == 0)
13375                         dtrace_closef = NULL;
13376         }
13377 
13378         return (0);
13379 }
13380 
13381 static int
13382 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13383     dtrace_optval_t val)
13384 {
13385         ASSERT(MUTEX_HELD(&dtrace_lock));
13386 
13387         if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13388                 return (EBUSY);
13389 
13390         if (option >= DTRACEOPT_MAX)
13391                 return (EINVAL);
13392 
13393         if (option != DTRACEOPT_CPU && val < 0)
13394                 return (EINVAL);
13395 
13396         switch (option) {
13397         case DTRACEOPT_DESTRUCTIVE:


14918                 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
14919                 range = kmem_zalloc(nsize, KM_SLEEP);
14920 
14921                 if (dtrace_toxrange != NULL) {
14922                         ASSERT(osize != 0);
14923                         bcopy(dtrace_toxrange, range, osize);
14924                         kmem_free(dtrace_toxrange, osize);
14925                 }
14926 
14927                 dtrace_toxrange = range;
14928         }
14929 
14930         ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14931         ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14932 
14933         dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14934         dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14935         dtrace_toxranges++;
14936 }
14937 
14938 static void
14939 dtrace_getf_barrier()
14940 {
14941         /*
14942          * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
14943          * that contain calls to getf(), this routine will be called on every
14944          * closef() before either the underlying vnode is released or the
14945          * file_t itself is freed.  By the time we are here, it is essential
14946          * that the file_t can no longer be accessed from a call to getf()
14947          * in probe context -- that assures that a dtrace_sync() can be used
14948          * to clear out any enablings referring to the old structures.
14949          */
14950         if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
14951             kcred->cr_zone->zone_dtrace_getf != 0)
14952                 dtrace_sync();
14953 }
14954 
14955 /*
14956  * DTrace Driver Cookbook Functions
14957  */
14958 /*ARGSUSED*/
14959 static int
14960 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14961 {
14962         dtrace_provider_id_t id;
14963         dtrace_state_t *state = NULL;
14964         dtrace_enabling_t *enab;
14965 
14966         mutex_enter(&cpu_lock);
14967         mutex_enter(&dtrace_provider_lock);
14968         mutex_enter(&dtrace_lock);
14969 
14970         if (ddi_soft_state_init(&dtrace_softstate,
14971             sizeof (dtrace_state_t), 0) != 0) {
14972                 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
14973                 mutex_exit(&cpu_lock);
14974                 mutex_exit(&dtrace_provider_lock);


16091 
16092                 /*
16093                  * If we're being detached with anonymous state, we need to
16094                  * indicate to the kernel debugger that DTrace is now inactive.
16095                  */
16096                 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16097         }
16098 
16099         bzero(&dtrace_anon, sizeof (dtrace_anon_t));
16100         unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
16101         dtrace_cpu_init = NULL;
16102         dtrace_helpers_cleanup = NULL;
16103         dtrace_helpers_fork = NULL;
16104         dtrace_cpustart_init = NULL;
16105         dtrace_cpustart_fini = NULL;
16106         dtrace_debugger_init = NULL;
16107         dtrace_debugger_fini = NULL;
16108         dtrace_modload = NULL;
16109         dtrace_modunload = NULL;
16110 
16111         ASSERT(dtrace_getf == 0);
16112         ASSERT(dtrace_closef == NULL);
16113 
16114         mutex_exit(&cpu_lock);
16115 
16116         if (dtrace_helptrace_enabled) {
16117                 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
16118                 dtrace_helptrace_buffer = NULL;
16119         }
16120 
16121         kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
16122         dtrace_probes = NULL;
16123         dtrace_nprobes = 0;
16124 
16125         dtrace_hash_destroy(dtrace_bymod);
16126         dtrace_hash_destroy(dtrace_byfunc);
16127         dtrace_hash_destroy(dtrace_byname);
16128         dtrace_bymod = NULL;
16129         dtrace_byfunc = NULL;
16130         dtrace_byname = NULL;
16131 
16132         kmem_cache_destroy(dtrace_state_cache);
16133         vmem_destroy(dtrace_minor);