Print this page
2915 DTrace in a zone should see "cpu", "curpsinfo", et al
2916 DTrace in a zone should be able to access fds[]
2917 DTrace in a zone should have limited provider access
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
Reviewed by: Adam Leventhal <ahl@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/dtrace/dtrace.c
          +++ new/usr/src/uts/common/dtrace/dtrace.c
↓ open down ↓ 163 lines elided ↑ open up ↑
 164  164  static dev_info_t       *dtrace_devi;           /* device info */
 165  165  static vmem_t           *dtrace_arena;          /* probe ID arena */
 166  166  static vmem_t           *dtrace_minor;          /* minor number arena */
 167  167  static taskq_t          *dtrace_taskq;          /* task queue */
 168  168  static dtrace_probe_t   **dtrace_probes;        /* array of all probes */
 169  169  static int              dtrace_nprobes;         /* number of probes */
 170  170  static dtrace_provider_t *dtrace_provider;      /* provider list */
 171  171  static dtrace_meta_t    *dtrace_meta_pid;       /* user-land meta provider */
 172  172  static int              dtrace_opens;           /* number of opens */
 173  173  static int              dtrace_helpers;         /* number of helpers */
      174 +static int              dtrace_getf;            /* number of unpriv getf()s */
 174  175  static void             *dtrace_softstate;      /* softstate pointer */
 175  176  static dtrace_hash_t    *dtrace_bymod;          /* probes hashed by module */
 176  177  static dtrace_hash_t    *dtrace_byfunc;         /* probes hashed by function */
 177  178  static dtrace_hash_t    *dtrace_byname;         /* probes hashed by name */
 178  179  static dtrace_toxrange_t *dtrace_toxrange;      /* toxic range array */
 179  180  static int              dtrace_toxranges;       /* number of toxic ranges */
 180  181  static int              dtrace_toxranges_max;   /* size of toxic range array */
 181  182  static dtrace_anon_t    dtrace_anon;            /* anonymous enabling */
 182  183  static kmem_cache_t     *dtrace_state_cache;    /* cache for dynamic state */
 183  184  static uint64_t         dtrace_vtime_references; /* number of vtimestamp refs */
↓ open down ↓ 182 lines elided ↑ open up ↑
 366  367  #define DTRACE_ALIGNCHECK(addr, size, flags)
 367  368  #endif
 368  369  
 369  370  /*
 370  371   * Test whether a range of memory starting at testaddr of size testsz falls
 371  372   * within the range of memory described by addr, sz.  We take care to avoid
 372  373   * problems with overflow and underflow of the unsigned quantities, and
 373  374   * disallow all negative sizes.  Ranges of size 0 are allowed.
 374  375   */
 375  376  #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
 376      -        ((testaddr) - (baseaddr) < (basesz) && \
 377      -        (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
      377 +        ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
      378 +        (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
 378  379          (testaddr) + (testsz) >= (testaddr))
 379  380  
 380  381  /*
 381  382   * Test whether alloc_sz bytes will fit in the scratch region.  We isolate
 382  383   * alloc_sz on the righthand side of the comparison in order to avoid overflow
 383  384   * or underflow in the comparison with it.  This is simpler than the INRANGE
 384  385   * check above, because we know that the dtms_scratch_ptr is valid in the
 385  386   * range.  Allocations of size zero are allowed.
 386  387   */
 387  388  #define DTRACE_INSCRATCH(mstate, alloc_sz) \
↓ open down ↓ 80 lines elided ↑ open up ↑
 468  469      dtrace_state_t *, uint64_t, uint64_t);
 469  470  static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
 470  471  static void dtrace_buffer_drop(dtrace_buffer_t *);
 471  472  static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
 472  473  static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
 473  474      dtrace_state_t *, dtrace_mstate_t *);
 474  475  static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
 475  476      dtrace_optval_t);
 476  477  static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
 477  478  static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
      479 +static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
      480 +static void dtrace_getf_barrier(void);
 478  481  
 479  482  /*
 480  483   * DTrace Probe Context Functions
 481  484   *
 482  485   * These functions are called from probe context.  Because probe context is
 483  486   * any context in which C may be called, arbitrarily locks may be held,
 484  487   * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
 485  488   * As a result, functions called from probe context may only call other DTrace
 486  489   * support functions -- they may not interact at all with the system at large.
 487  490   * (Note that the ASSERT macro is made probe-context safe by redefining it in
↓ open down ↓ 124 lines elided ↑ open up ↑
 612  615           */
 613  616          if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
 614  617              mstate->dtms_scratch_size))
 615  618                  return (1);
 616  619  
 617  620          /*
 618  621           * Now check to see if it's a dynamic variable.  This check will pick
 619  622           * up both thread-local variables and any global dynamically-allocated
 620  623           * variables.
 621  624           */
 622      -        if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
      625 +        if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
 623  626              vstate->dtvs_dynvars.dtds_size)) {
 624  627                  dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 625  628                  uintptr_t base = (uintptr_t)dstate->dtds_base +
 626  629                      (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
 627  630                  uintptr_t chunkoffs;
 628  631  
 629  632                  /*
 630  633                   * Before we assume that we can store here, we need to make
 631  634                   * sure that it isn't in our metadata -- storing to our
 632  635                   * dynamic variable metadata would corrupt our state.  For
↓ open down ↓ 46 lines elided ↑ open up ↑
 679  682   * illegal value slot.
 680  683   *
 681  684   * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
 682  685   * appropriate memory access protection.
 683  686   */
 684  687  static int
 685  688  dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
 686  689      dtrace_vstate_t *vstate)
 687  690  {
 688  691          volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
      692 +        file_t *fp;
 689  693  
 690  694          /*
 691  695           * If we hold the privilege to read from kernel memory, then
 692  696           * everything is readable.
 693  697           */
 694  698          if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
 695  699                  return (1);
 696  700  
 697  701          /*
 698  702           * You can obviously read that which you can store.
 699  703           */
 700  704          if (dtrace_canstore(addr, sz, mstate, vstate))
 701  705                  return (1);
 702  706  
 703  707          /*
 704  708           * We're allowed to read from our own string table.
 705  709           */
 706      -        if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
      710 +        if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
 707  711              mstate->dtms_difo->dtdo_strlen))
 708  712                  return (1);
 709  713  
      714 +        if (vstate->dtvs_state != NULL &&
      715 +            dtrace_priv_proc(vstate->dtvs_state, mstate)) {
      716 +                proc_t *p;
      717 +
      718 +                /*
      719 +                 * When we have privileges to the current process, there are
      720 +                 * several context-related kernel structures that are safe to
      721 +                 * read, even absent the privilege to read from kernel memory.
      722 +                 * These reads are safe because these structures contain only
      723 +                 * state that (1) we're permitted to read, (2) is harmless or
      724 +                 * (3) contains pointers to additional kernel state that we're
      725 +                 * not permitted to read (and as such, do not present an
      726 +                 * opportunity for privilege escalation).  Finally (and
      727 +                 * critically), because of the nature of their relation with
      728 +                 * the current thread context, the memory associated with these
      729 +                 * structures cannot change over the duration of probe context,
      730 +                 * and it is therefore impossible for this memory to be
      731 +                 * deallocated and reallocated as something else while it's
      732 +                 * being operated upon.
      733 +                 */
      734 +                if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
      735 +                        return (1);
      736 +
      737 +                if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
      738 +                    sz, curthread->t_procp, sizeof (proc_t))) {
      739 +                        return (1);
      740 +                }
      741 +
      742 +                if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
      743 +                    curthread->t_cred, sizeof (cred_t))) {
      744 +                        return (1);
      745 +                }
      746 +
      747 +                if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
      748 +                    &(p->p_pidp->pid_id), sizeof (pid_t))) {
      749 +                        return (1);
      750 +                }
      751 +
      752 +                if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
      753 +                    curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
      754 +                        return (1);
      755 +                }
      756 +        }
      757 +
      758 +        if ((fp = mstate->dtms_getf) != NULL) {
      759 +                uintptr_t psz = sizeof (void *);
      760 +                vnode_t *vp;
      761 +                vnodeops_t *op;
      762 +
      763 +                /*
      764 +                 * When getf() returns a file_t, the enabling is implicitly
      765 +                 * granted the (transient) right to read the returned file_t
      766 +                 * as well as the v_path and v_op->vnop_name of the underlying
      767 +                 * vnode.  These accesses are allowed after a successful
      768 +                 * getf() because the members that they refer to cannot change
      769 +                 * once set -- and the barrier logic in the kernel's closef()
      770 +                 * path assures that the file_t and its referenced vode_t
      771 +                 * cannot themselves be stale (that is, it impossible for
      772 +                 * either dtms_getf itself or its f_vnode member to reference
      773 +                 * freed memory).
      774 +                 */
      775 +                if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
      776 +                        return (1);
      777 +
      778 +                if ((vp = fp->f_vnode) != NULL) {
      779 +                        if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
      780 +                                return (1);
      781 +
      782 +                        if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
      783 +                            vp->v_path, strlen(vp->v_path) + 1)) {
      784 +                                return (1);
      785 +                        }
      786 +
      787 +                        if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
      788 +                                return (1);
      789 +
      790 +                        if ((op = vp->v_op) != NULL &&
      791 +                            DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
      792 +                                return (1);
      793 +                        }
      794 +
      795 +                        if (op != NULL && op->vnop_name != NULL &&
      796 +                            DTRACE_INRANGE(addr, sz, op->vnop_name,
      797 +                            strlen(op->vnop_name) + 1)) {
      798 +                                return (1);
      799 +                        }
      800 +                }
      801 +        }
      802 +
 710  803          DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
 711  804          *illval = addr;
 712  805          return (0);
 713  806  }
 714  807  
 715  808  /*
 716  809   * Convenience routine to check to see if a given string is within a memory
 717  810   * region in which a load may be issued given the user's privilege level;
 718  811   * this exists so that we don't need to issue unnecessary dtrace_strlen()
 719  812   * calls in the event that the user has all privileges.
↓ open down ↓ 358 lines elided ↑ open up ↑
1078 1171  dtrace_priv_proc_common_zone(dtrace_state_t *state)
1079 1172  {
1080 1173          cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1081 1174  
1082 1175          /*
1083 1176           * We should always have a non-NULL state cred here, since if cred
1084 1177           * is null (anonymous tracing), we fast-path bypass this routine.
1085 1178           */
1086 1179          ASSERT(s_cr != NULL);
1087 1180  
1088      -        if ((cr = CRED()) != NULL &&
1089      -            s_cr->cr_zone == cr->cr_zone)
     1181 +        if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
1090 1182                  return (1);
1091 1183  
1092 1184          return (0);
1093 1185  }
1094 1186  
1095 1187  /*
1096 1188   * This privilege check should be used by actions and subroutines to
1097 1189   * verify that the process has not setuid or changed credentials.
1098 1190   */
1099 1191  static int
↓ open down ↓ 102 lines elided ↑ open up ↑
1202 1294          dtrace_provider_t *prov = probe->dtpr_provider;
1203 1295          dtrace_pops_t *pops = &prov->dtpv_pops;
1204 1296          int mode = DTRACE_MODE_NOPRIV_DROP;
1205 1297  
1206 1298          ASSERT(ecb->dte_cond);
1207 1299  
1208 1300          if (pops->dtps_mode != NULL) {
1209 1301                  mode = pops->dtps_mode(prov->dtpv_arg,
1210 1302                      probe->dtpr_id, probe->dtpr_arg);
1211 1303  
1212      -                ASSERT((mode & DTRACE_MODE_USER) ||
1213      -                    (mode & DTRACE_MODE_KERNEL));
1214      -                ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
1215      -                    (mode & DTRACE_MODE_NOPRIV_DROP));
     1304 +                ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
     1305 +                ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
     1306 +                    DTRACE_MODE_NOPRIV_DROP));
1216 1307          }
1217 1308  
1218 1309          /*
1219 1310           * If the dte_cond bits indicate that this consumer is only allowed to
1220      -         * see user-mode firings of this probe, call the provider's dtps_mode()
1221      -         * entry point to check that the probe was fired while in a user
1222      -         * context.  If that's not the case, use the policy specified by the
1223      -         * provider to determine if we drop the probe or merely restrict
1224      -         * operation.
     1311 +         * see user-mode firings of this probe, check that the probe was fired
     1312 +         * while in a user context.  If that's not the case, use the policy
     1313 +         * specified by the provider to determine if we drop the probe or
     1314 +         * merely restrict operation.
1225 1315           */
1226 1316          if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1227 1317                  ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1228 1318  
1229 1319                  if (!(mode & DTRACE_MODE_USER)) {
1230 1320                          if (mode & DTRACE_MODE_NOPRIV_DROP)
1231 1321                                  return (0);
1232 1322  
1233 1323                          mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1234 1324                  }
↓ open down ↓ 46 lines elided ↑ open up ↑
1281 1371                  if ((cr = CRED()) == NULL ||
1282 1372                      s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1283 1373                          if (mode & DTRACE_MODE_NOPRIV_DROP)
1284 1374                                  return (0);
1285 1375  
1286 1376                          mstate->dtms_access &=
1287 1377                              ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1288 1378                  }
1289 1379          }
1290 1380  
     1381 +        /*
     1382 +         * By merits of being in this code path at all, we have limited
     1383 +         * privileges.  If the provider has indicated that limited privileges
     1384 +         * are to denote restricted operation, strip off the ability to access
     1385 +         * arguments.
     1386 +         */
     1387 +        if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
     1388 +                mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
     1389 +
1291 1390          return (1);
1292 1391  }
1293 1392  
1294 1393  /*
1295 1394   * Note:  not called from probe context.  This function is called
1296 1395   * asynchronously (and at a regular interval) from outside of probe context to
1297 1396   * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable
1298 1397   * cleaning is explained in detail in <sys/dtrace_impl.h>.
1299 1398   */
1300 1399  void
↓ open down ↓ 1616 lines elided ↑ open up ↑
2917 3016  
2918 3017                  rval = dtrace_getvmreg(ndx,
2919 3018                      &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
2920 3019  
2921 3020                  DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2922 3021  
2923 3022                  return (rval);
2924 3023          }
2925 3024  
2926 3025          case DIF_VAR_CURTHREAD:
2927      -                if (!dtrace_priv_kernel(state))
     3026 +                if (!dtrace_priv_proc(state, mstate))
2928 3027                          return (0);
2929 3028                  return ((uint64_t)(uintptr_t)curthread);
2930 3029  
2931 3030          case DIF_VAR_TIMESTAMP:
2932 3031                  if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
2933 3032                          mstate->dtms_timestamp = dtrace_gethrtime();
2934 3033                          mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
2935 3034                  }
2936 3035                  return (mstate->dtms_timestamp);
2937 3036  
↓ open down ↓ 1507 lines elided ↑ open up ↑
4445 4544  
4446 4545                  for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
4447 4546                          dest[j] = dtrace_load8(src + i);
4448 4547  
4449 4548                  dest[j] = '\0';
4450 4549                  regs[rd] = (uintptr_t)dest;
4451 4550                  mstate->dtms_scratch_ptr += size;
4452 4551                  break;
4453 4552          }
4454 4553  
     4554 +        case DIF_SUBR_GETF: {
     4555 +                uintptr_t fd = tupregs[0].dttk_value;
     4556 +                uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
     4557 +                file_t *fp;
     4558 +
     4559 +                if (!dtrace_priv_proc(state, mstate)) {
     4560 +                        regs[rd] = NULL;
     4561 +                        break;
     4562 +                }
     4563 +
     4564 +                /*
     4565 +                 * This is safe because fi_nfiles only increases, and the
     4566 +                 * fi_list array is not freed when the array size doubles.
     4567 +                 * (See the comment in flist_grow() for details on the
     4568 +                 * management of the u_finfo structure.)
     4569 +                 */
     4570 +                fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
     4571 +
     4572 +                mstate->dtms_getf = fp;
     4573 +                regs[rd] = (uintptr_t)fp;
     4574 +                break;
     4575 +        }
     4576 +
4455 4577          case DIF_SUBR_CLEANPATH: {
4456 4578                  char *dest = (char *)mstate->dtms_scratch_ptr, c;
4457 4579                  uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4458 4580                  uintptr_t src = tupregs[0].dttk_value;
4459 4581                  int i = 0, j = 0;
     4582 +                zone_t *z;
4460 4583  
4461 4584                  if (!dtrace_strcanload(src, size, mstate, vstate)) {
4462 4585                          regs[rd] = NULL;
4463 4586                          break;
4464 4587                  }
4465 4588  
4466 4589                  if (!DTRACE_INSCRATCH(mstate, size)) {
4467 4590                          DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4468 4591                          regs[rd] = NULL;
4469 4592                          break;
↓ open down ↓ 78 lines elided ↑ open up ↑
4548 4671                           */
4549 4672                          i--;
4550 4673                          while (j != 0 && dest[--j] != '/')
4551 4674                                  continue;
4552 4675  
4553 4676                          if (c == '\0')
4554 4677                                  dest[++j] = '/';
4555 4678                  } while (c != '\0');
4556 4679  
4557 4680                  dest[j] = '\0';
     4681 +
     4682 +                if (mstate->dtms_getf != NULL &&
     4683 +                    !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
     4684 +                    (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
     4685 +                        /*
     4686 +                         * If we've done a getf() as a part of this ECB and we
     4687 +                         * don't have kernel access (and we're not in the global
     4688 +                         * zone), check if the path we cleaned up begins with
     4689 +                         * the zone's root path, and trim it off if so.  Note
     4690 +                         * that this is an output cleanliness issue, not a
     4691 +                         * security issue: knowing one's zone root path does
     4692 +                         * not enable privilege escalation.
     4693 +                         */
     4694 +                        if (strstr(dest, z->zone_rootpath) == dest)
     4695 +                                dest += strlen(z->zone_rootpath) - 1;
     4696 +                }
     4697 +
4558 4698                  regs[rd] = (uintptr_t)dest;
4559 4699                  mstate->dtms_scratch_ptr += size;
4560 4700                  break;
4561 4701          }
4562 4702  
4563 4703          case DIF_SUBR_INET_NTOA:
4564 4704          case DIF_SUBR_INET_NTOA6:
4565 4705          case DIF_SUBR_INET_NTOP: {
4566 4706                  size_t size;
4567 4707                  int af, argi, i;
↓ open down ↓ 364 lines elided ↑ open up ↑
4932 5072                          break;
4933 5073                  case DIF_OP_BLE:
4934 5074                          if (cc_z | (cc_n ^ cc_v))
4935 5075                                  pc = DIF_INSTR_LABEL(instr);
4936 5076                          break;
4937 5077                  case DIF_OP_BLEU:
4938 5078                          if (cc_c | cc_z)
4939 5079                                  pc = DIF_INSTR_LABEL(instr);
4940 5080                          break;
4941 5081                  case DIF_OP_RLDSB:
4942      -                        if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4943      -                                *flags |= CPU_DTRACE_KPRIV;
4944      -                                *illval = regs[r1];
     5082 +                        if (!dtrace_canload(regs[r1], 1, mstate, vstate))
4945 5083                                  break;
4946      -                        }
4947 5084                          /*FALLTHROUGH*/
4948 5085                  case DIF_OP_LDSB:
4949 5086                          regs[rd] = (int8_t)dtrace_load8(regs[r1]);
4950 5087                          break;
4951 5088                  case DIF_OP_RLDSH:
4952      -                        if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4953      -                                *flags |= CPU_DTRACE_KPRIV;
4954      -                                *illval = regs[r1];
     5089 +                        if (!dtrace_canload(regs[r1], 2, mstate, vstate))
4955 5090                                  break;
4956      -                        }
4957 5091                          /*FALLTHROUGH*/
4958 5092                  case DIF_OP_LDSH:
4959 5093                          regs[rd] = (int16_t)dtrace_load16(regs[r1]);
4960 5094                          break;
4961 5095                  case DIF_OP_RLDSW:
4962      -                        if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4963      -                                *flags |= CPU_DTRACE_KPRIV;
4964      -                                *illval = regs[r1];
     5096 +                        if (!dtrace_canload(regs[r1], 4, mstate, vstate))
4965 5097                                  break;
4966      -                        }
4967 5098                          /*FALLTHROUGH*/
4968 5099                  case DIF_OP_LDSW:
4969 5100                          regs[rd] = (int32_t)dtrace_load32(regs[r1]);
4970 5101                          break;
4971 5102                  case DIF_OP_RLDUB:
4972      -                        if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
4973      -                                *flags |= CPU_DTRACE_KPRIV;
4974      -                                *illval = regs[r1];
     5103 +                        if (!dtrace_canload(regs[r1], 1, mstate, vstate))
4975 5104                                  break;
4976      -                        }
4977 5105                          /*FALLTHROUGH*/
4978 5106                  case DIF_OP_LDUB:
4979 5107                          regs[rd] = dtrace_load8(regs[r1]);
4980 5108                          break;
4981 5109                  case DIF_OP_RLDUH:
4982      -                        if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
4983      -                                *flags |= CPU_DTRACE_KPRIV;
4984      -                                *illval = regs[r1];
     5110 +                        if (!dtrace_canload(regs[r1], 2, mstate, vstate))
4985 5111                                  break;
4986      -                        }
4987 5112                          /*FALLTHROUGH*/
4988 5113                  case DIF_OP_LDUH:
4989 5114                          regs[rd] = dtrace_load16(regs[r1]);
4990 5115                          break;
4991 5116                  case DIF_OP_RLDUW:
4992      -                        if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
4993      -                                *flags |= CPU_DTRACE_KPRIV;
4994      -                                *illval = regs[r1];
     5117 +                        if (!dtrace_canload(regs[r1], 4, mstate, vstate))
4995 5118                                  break;
4996      -                        }
4997 5119                          /*FALLTHROUGH*/
4998 5120                  case DIF_OP_LDUW:
4999 5121                          regs[rd] = dtrace_load32(regs[r1]);
5000 5122                          break;
5001 5123                  case DIF_OP_RLDX:
5002      -                        if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
5003      -                                *flags |= CPU_DTRACE_KPRIV;
5004      -                                *illval = regs[r1];
     5124 +                        if (!dtrace_canload(regs[r1], 8, mstate, vstate))
5005 5125                                  break;
5006      -                        }
5007 5126                          /*FALLTHROUGH*/
5008 5127                  case DIF_OP_LDX:
5009 5128                          regs[rd] = dtrace_load64(regs[r1]);
5010 5129                          break;
5011 5130                  case DIF_OP_ULDSB:
5012 5131                          regs[rd] = (int8_t)
5013 5132                              dtrace_fuword8((void *)(uintptr_t)regs[r1]);
5014 5133                          break;
5015 5134                  case DIF_OP_ULDSH:
5016 5135                          regs[rd] = (int16_t)
↓ open down ↓ 918 lines elided ↑ open up ↑
5935 6054                   * action loop will use the last iteration's value.
5936 6055                   */
5937 6056  #ifdef lint
5938 6057                  uint64_t val = 0;
5939 6058  #else
5940 6059                  uint64_t val;
5941 6060  #endif
5942 6061  
5943 6062                  mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
5944 6063                  mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
     6064 +                mstate.dtms_getf = NULL;
     6065 +
5945 6066                  *flags &= ~CPU_DTRACE_ERROR;
5946 6067  
5947 6068                  if (prov == dtrace_provider) {
5948 6069                          /*
5949 6070                           * If dtrace itself is the provider of this probe,
5950 6071                           * we're only going to continue processing the ECB if
5951 6072                           * arg0 (the dtrace_state_t) is equal to the ECB's
5952 6073                           * creating state.  (This prevents disjoint consumers
5953 6074                           * from seeing one another's metaprobes.)
5954 6075                           */
↓ open down ↓ 2486 lines elided ↑ open up ↑
8441 8562                                  err += efunc(pc, "invalid subr %u\n", subr);
8442 8563                          if (rd >= nregs)
8443 8564                                  err += efunc(pc, "invalid register %u\n", rd);
8444 8565                          if (rd == 0)
8445 8566                                  err += efunc(pc, "cannot write to %r0\n");
8446 8567  
8447 8568                          if (subr == DIF_SUBR_COPYOUT ||
8448 8569                              subr == DIF_SUBR_COPYOUTSTR) {
8449 8570                                  dp->dtdo_destructive = 1;
8450 8571                          }
     8572 +
     8573 +                        if (subr == DIF_SUBR_GETF) {
     8574 +                                /*
     8575 +                                 * If we have a getf() we need to record that
     8576 +                                 * in our state.  Note that our state can be
     8577 +                                 * NULL if this is a helper -- but in that
     8578 +                                 * case, the call to getf() is itself illegal,
     8579 +                                 * and will be caught (slightly later) when
     8580 +                                 * the helper is validated.
     8581 +                                 */
     8582 +                                if (vstate->dtvs_state != NULL)
     8583 +                                        vstate->dtvs_state->dts_getf++;
     8584 +                        }
     8585 +
8451 8586                          break;
8452 8587                  case DIF_OP_PUSHTR:
8453 8588                          if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8454 8589                                  err += efunc(pc, "invalid ref type %u\n", type);
8455 8590                          if (r2 >= nregs)
8456 8591                                  err += efunc(pc, "invalid register %u\n", r2);
8457 8592                          if (rs >= nregs)
8458 8593                                  err += efunc(pc, "invalid register %u\n", rs);
8459 8594                          break;
8460 8595                  case DIF_OP_PUSHTV:
↓ open down ↓ 4622 lines elided ↑ open up ↑
13083 13218          hdlr.cyh_level = CY_LOW_LEVEL;
13084 13219  
13085 13220          when.cyt_when = 0;
13086 13221          when.cyt_interval = dtrace_deadman_interval;
13087 13222  
13088 13223          state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13089 13224          state->dts_deadman = cyclic_add(&hdlr, &when);
13090 13225  
13091 13226          state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13092 13227  
     13228 +        if (state->dts_getf != 0 &&
     13229 +            !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
     13230 +                /*
     13231 +                 * We don't have kernel privs but we have at least one call
     13232 +                 * to getf(); we need to bump our zone's count, and (if
     13233 +                 * this is the first enabling to have an unprivileged call
     13234 +                 * to getf()) we need to hook into closef().
     13235 +                 */
     13236 +                state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
     13237 +
     13238 +                if (dtrace_getf++ == 0) {
     13239 +                        ASSERT(dtrace_closef == NULL);
     13240 +                        dtrace_closef = dtrace_getf_barrier;
     13241 +                }
     13242 +        }
     13243 +
13093 13244          /*
13094 13245           * Now it's time to actually fire the BEGIN probe.  We need to disable
13095 13246           * interrupts here both to record the CPU on which we fired the BEGIN
13096 13247           * probe (the data from this CPU will be processed first at user
13097 13248           * level) and to manually activate the buffer for this CPU.
13098 13249           */
13099 13250          cookie = dtrace_interrupt_disable();
13100 13251          *cpu = CPU->cpu_id;
13101 13252          ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13102 13253          state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
↓ open down ↓ 96 lines elided ↑ open up ↑
13199 13350  
13200 13351          cookie = dtrace_interrupt_disable();
13201 13352          *cpu = CPU->cpu_id;
13202 13353          dtrace_probe(dtrace_probeid_end,
13203 13354              (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13204 13355          dtrace_interrupt_enable(cookie);
13205 13356  
13206 13357          state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13207 13358          dtrace_sync();
13208 13359  
     13360 +        if (state->dts_getf != 0 &&
     13361 +            !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
     13362 +                /*
     13363 +                 * We don't have kernel privs but we have at least one call
     13364 +                 * to getf(); we need to lower our zone's count, and (if
     13365 +                 * this is the last enabling to have an unprivileged call
     13366 +                 * to getf()) we need to clear the closef() hook.
     13367 +                 */
     13368 +                ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
     13369 +                ASSERT(dtrace_closef == dtrace_getf_barrier);
     13370 +                ASSERT(dtrace_getf > 0);
     13371 +
     13372 +                state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
     13373 +
     13374 +                if (--dtrace_getf == 0)
     13375 +                        dtrace_closef = NULL;
     13376 +        }
     13377 +
13209 13378          return (0);
13210 13379  }
13211 13380  
13212 13381  static int
13213 13382  dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13214 13383      dtrace_optval_t val)
13215 13384  {
13216 13385          ASSERT(MUTEX_HELD(&dtrace_lock));
13217 13386  
13218 13387          if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
↓ open down ↓ 1540 lines elided ↑ open up ↑
14759 14928          }
14760 14929  
14761 14930          ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
14762 14931          ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
14763 14932  
14764 14933          dtrace_toxrange[dtrace_toxranges].dtt_base = base;
14765 14934          dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
14766 14935          dtrace_toxranges++;
14767 14936  }
14768 14937  
     14938 +static void
     14939 +dtrace_getf_barrier()
     14940 +{
     14941 +        /*
     14942 +         * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
     14943 +         * that contain calls to getf(), this routine will be called on every
     14944 +         * closef() before either the underlying vnode is released or the
     14945 +         * file_t itself is freed.  By the time we are here, it is essential
     14946 +         * that the file_t can no longer be accessed from a call to getf()
     14947 +         * in probe context -- that assures that a dtrace_sync() can be used
     14948 +         * to clear out any enablings referring to the old structures.
     14949 +         */
     14950 +        if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
     14951 +            kcred->cr_zone->zone_dtrace_getf != 0)
     14952 +                dtrace_sync();
     14953 +}
     14954 +
14769 14955  /*
14770 14956   * DTrace Driver Cookbook Functions
14771 14957   */
14772 14958  /*ARGSUSED*/
14773 14959  static int
14774 14960  dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
14775 14961  {
14776 14962          dtrace_provider_id_t id;
14777 14963          dtrace_state_t *state = NULL;
14778 14964          dtrace_enabling_t *enab;
↓ open down ↓ 1136 lines elided ↑ open up ↑
15915 16101          dtrace_cpu_init = NULL;
15916 16102          dtrace_helpers_cleanup = NULL;
15917 16103          dtrace_helpers_fork = NULL;
15918 16104          dtrace_cpustart_init = NULL;
15919 16105          dtrace_cpustart_fini = NULL;
15920 16106          dtrace_debugger_init = NULL;
15921 16107          dtrace_debugger_fini = NULL;
15922 16108          dtrace_modload = NULL;
15923 16109          dtrace_modunload = NULL;
15924 16110  
     16111 +        ASSERT(dtrace_getf == 0);
     16112 +        ASSERT(dtrace_closef == NULL);
     16113 +
15925 16114          mutex_exit(&cpu_lock);
15926 16115  
15927 16116          if (dtrace_helptrace_enabled) {
15928 16117                  kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
15929 16118                  dtrace_helptrace_buffer = NULL;
15930 16119          }
15931 16120  
15932 16121          kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
15933 16122          dtrace_probes = NULL;
15934 16123          dtrace_nprobes = 0;
↓ open down ↓ 129 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX