1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 /*
  28  * tnf driver - provides probe control and kernel trace buffer access
  29  * to the user programs prex and tnfxtract.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/file.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/fcntl.h>
  38 #include <sys/uio.h>
  39 #include <sys/kmem.h>
  40 #include <sys/cred.h>
  41 #include <sys/mman.h>
  42 #include <sys/errno.h>
  43 #include <sys/stat.h>
  44 #include <sys/conf.h>
  45 #include <sys/ddi.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/modctl.h>
  48 #include <sys/tnf.h>
  49 #include <sys/debug.h>
  50 #include <sys/devops.h>
  51 #include <vm/as.h>
  52 #include <vm/seg_kp.h>
  53 #include <sys/tnf_probe.h>
  54 #include <sys/kobj.h>
  55 
  56 #include "tnf_buf.h"
  57 #include "tnf_types.h"
  58 #include "tnf_trace.h"
  59 
  60 #ifndef NPROBE
  61 
  62 /*
  63  * Each probe is independently put in the kernel, prex uses
  64  * __tnf_probe_list_head and __tnf_tag_list_head as pointers to linked list
  65  * for probes and static tnf_tag_data_t, respectively.
  66  * tnf used the elf relocation record to build a separate linked list for
  67  * the probes and tnf_tag_data_t. We will describe how the linked list for
  68  * __tnf_tag_list_head is made, the probe list is very similar.
  69  * During the dynamic relocation(in uts/sparc/krtld/kobj_reloc.c),
  70  * the &__tnf_tag_version_1(the first member in tnf_tag_data_t data struct)
  71  * (and since it is a global variable which was never defined) will be filled
  72  * with 0. The following code in kobj_reloc.c will get the address of current
  73  * __tnf_tag_list_head and put it in value_p:
  74  *   #define TAG_MARKER_SYMBOL       "__tnf_tag_version_1"
  75  *   if (strcmp(symname, TAG_MARKER_SYMBOL) == 0) {
  76  *       *addend_p = 0;
  77  *       *value_p = (Addr) __tnf_tag_list_head; (value_p points to list head)
  78  *       __tnf_tag_list_head = (void *)*offset_p;(list head is the next record)
  79  *       return (0);
  80  *   }
  81  *
  82  * the function do_reloc(in the kobj_reloc.c) will put vlaue_p into
  83  * &__tnf_tag_version_1
  84  * Now the &__tnf_tag_version_1 points to the last list head
  85  * and __tnf_tag_list_head points to the new list head.
  86  * This is equivalent to attatch a node at the beginning of the list.
  87  *
  88  */
  89 extern tnf_probe_control_t *__tnf_probe_list_head;
  90 extern tnf_tag_data_t *__tnf_tag_list_head;
  91 extern int tnf_changed_probe_list;
  92 
  93 static int tnf_attach(dev_info_t *, ddi_attach_cmd_t);
  94 static int tnf_detach(dev_info_t *, ddi_detach_cmd_t);
  95 static int tnf_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
  96 static int tnf_open(dev_t *, int, int, struct cred *);
  97 static int tnf_close(dev_t, int, int, struct cred *);
  98 #ifdef UNUSED
  99 static int tnf_mmap(dev_t, off_t, int);
 100 #endif
 101 static int tnf_ioctl(dev_t, int, intptr_t, int, struct cred *, int *);
 102 #ifdef UNUSED
 103 static int tnf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t,
 104     int, char *, caddr_t, int *);
 105 #endif
 106 static dev_info_t *tnf_devi;
 107 
 108 static struct {
 109         int             tnf_probe_count;
 110         boolean_t       tnf_pidfilter_mode;
 111         boolean_t       ctldev_is_open;
 112         int             mapdev_open_count;
 113         kmutex_t        tnf_mtx;
 114 } tnf_drv_state = { 0, B_FALSE, B_FALSE, 0 };
 115 
 116 static int tnf_getmaxprobe(caddr_t, int);
 117 static int tnf_getprobevals(caddr_t, int);
 118 static int tnf_getprobestring(caddr_t, int);
 119 static int tnf_setprobevals(caddr_t, int);
 120 static int tnf_getstate(caddr_t, int);
 121 static int tnf_allocbuf(intptr_t);
 122 static int tnf_deallocbuf(void);
 123 static int tnf_settracing(int);
 124 static int tnf_pidfilterset(int);
 125 static int tnf_pidfilterget(caddr_t, int);
 126 static int tnf_getpidstate(caddr_t, int);
 127 static int tnf_setpidstate(int, pid_t, int);
 128 static int tnf_getheader(caddr_t, int);
 129 static int tnf_getblock(caddr_t, int);
 130 static int tnf_getfwzone(caddr_t, int);
 131 
 132 static void *tnf_test_1(void *, tnf_probe_control_t *, tnf_probe_setup_t *);
 133 static void *tnf_test_2(void *, tnf_probe_control_t *, tnf_probe_setup_t *);
 134 
 135 #define TNFCTL_MINOR 0
 136 #define TNFMAP_MINOR 1
 137 
 138 struct cb_ops   tnf_cb_ops = {
 139         tnf_open,               /* open */
 140         tnf_close,              /* close */
 141         nodev,                  /* strategy */
 142         nodev,                  /* print */
 143         nodev,                  /* dump */
 144         nodev,                  /* read */
 145         nodev,                  /* write */
 146         tnf_ioctl,              /* ioctl */
 147         nodev,                  /* devmap */
 148         nodev,                  /* mmap */
 149         nodev,                  /* segmap */
 150         nochpoll,               /* poll */
 151         ddi_prop_op,            /* prop_op */
 152         0,                      /* streamtab  */
 153         D_NEW | D_MP            /* Driver compatibility flag */
 154 };
 155 
 156 struct dev_ops  tnf_ops = {
 157         DEVO_REV,               /* devo_rev, */
 158         0,                      /* refcnt  */
 159         tnf_info,               /* info */
 160         nulldev,                /* identify */
 161         nulldev,                /* probe */
 162         tnf_attach,             /* attach */
 163         tnf_detach,             /* detach */
 164         nodev,                  /* reset */
 165         &tnf_cb_ops,                /* driver operations */
 166         (struct bus_ops *)0,    /* no bus operations */
 167         NULL,                   /* power */
 168         ddi_quiesce_not_needed,         /* quiesce */
 169 };
 170 
 171 extern struct mod_ops mod_driverops;
 172 
 173 static struct modldrv modldrv = {
 174         &mod_driverops,
 175         "kernel probes driver",
 176         &tnf_ops,
 177 };
 178 
 179 static struct modlinkage modlinkage = {
 180         MODREV_1,
 181         { (void *)&modldrv, NULL }
 182 };
 183 
 184 int
 185 _init()
 186 {
 187         register int error;
 188 
 189         mutex_init(&tnf_drv_state.tnf_mtx, NULL, MUTEX_DEFAULT, NULL);
 190 
 191         if ((error = mod_install(&modlinkage)) != 0) {
 192                 mutex_destroy(&tnf_drv_state.tnf_mtx);
 193                 return (error);
 194         }
 195 
 196         /* Give t0 a tpdp */
 197         if (!t0.t_tnf_tpdp)
 198                 t0.t_tnf_tpdp = kmem_zalloc(sizeof (tnf_ops_t), KM_SLEEP);
 199         /* Initialize tag system */
 200         tnf_tag_core_init();
 201         tnf_tag_trace_init();
 202         tnf_changed_probe_list = 1;
 203         return (0);
 204 }
 205 
 206 int
 207 _fini()
 208 {
 209         /* Not safe to unload this module, currently */
 210         return (EBUSY);
 211 }
 212 
 213 int
 214 _info(struct modinfo *modinfop)
 215 {
 216         return (mod_info(&modlinkage, modinfop));
 217 }
 218 
 219 /* ARGSUSED */
 220 static int
 221 tnf_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 222 {
 223         register int error;
 224 
 225         switch (infocmd) {
 226         case DDI_INFO_DEVT2DEVINFO:
 227                 *result = (void *)tnf_devi;
 228                 error = DDI_SUCCESS;
 229                 break;
 230         case DDI_INFO_DEVT2INSTANCE:
 231                 *result = (void *)0;
 232                 error = DDI_SUCCESS;
 233                 break;
 234         default:
 235                 error = DDI_FAILURE;
 236         }
 237         return (error);
 238 }
 239 
 240 static int
 241 tnf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 242 {
 243         if (cmd != DDI_ATTACH)
 244                 return (DDI_FAILURE);
 245         if ((ddi_create_minor_node(devi, "tnfctl", S_IFCHR, TNFCTL_MINOR,
 246             DDI_PSEUDO, NULL) == DDI_FAILURE) ||
 247             (ddi_create_minor_node(devi, "tnfmap", S_IFCHR, TNFMAP_MINOR,
 248             DDI_PSEUDO, NULL) == DDI_FAILURE)) {
 249                 ddi_remove_minor_node(devi, NULL);
 250                 return (DDI_FAILURE);
 251         }
 252         tnf_devi = devi;
 253         return (DDI_SUCCESS);
 254 }
 255 
 256 static int
 257 tnf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
 258 {
 259         if (cmd != DDI_DETACH)
 260                 return (DDI_FAILURE);
 261         ddi_remove_minor_node(devi, NULL);
 262         return (DDI_SUCCESS);
 263 }
 264 
 265 /*
 266  * property operations. Return the size of the kernel trace buffer.  We
 267  * only handle size property requests.  Others are passed on.
 268  */
 269 #ifdef UNUSED
 270 static int
 271 tnf_prop_op(dev_t dev, dev_info_t *di, ddi_prop_op_t prop,
 272     int m, char *name, caddr_t valuep, int *lengthp)
 273 {
 274         int length, *retbuf, size;
 275 
 276         if (strcmp(name, "size") == 0) {
 277 
 278                 /* Don't need tnf_mtx, since mapdev_open_count > 0 */
 279                 size = tnf_trace_file_size;
 280 
 281                 length = *lengthp;              /* get caller's length */
 282                 *lengthp = sizeof (int);        /* set caller's length */
 283 
 284                 switch (prop) {
 285 
 286                 case PROP_LEN:
 287                         return (DDI_PROP_SUCCESS);
 288 
 289                 case PROP_LEN_AND_VAL_ALLOC:
 290                         retbuf = kmem_alloc(sizeof (int),
 291                             (m & DDI_PROP_CANSLEEP) ? KM_SLEEP : KM_NOSLEEP);
 292                         if (retbuf == NULL)
 293                                 return (DDI_PROP_NO_MEMORY);
 294                         *(int **)valuep = retbuf;       /* set caller's buf */
 295                         *retbuf = size;
 296                         return (DDI_PROP_SUCCESS);
 297 
 298                 case PROP_LEN_AND_VAL_BUF:
 299                         if (length < sizeof (int))
 300                                 return (DDI_PROP_BUF_TOO_SMALL);
 301                         *(int *)valuep = size;
 302                         return (DDI_PROP_SUCCESS);
 303                 }
 304         }
 305         return (ddi_prop_op(dev, dip, prop, m, name, valuep, lengthp));
 306 }
 307 #endif
 308 
 309 /* ARGSUSED */
 310 static int
 311 tnf_open(dev_t *devp, int flag, int otyp, struct cred *cred)
 312 {
 313         int err = 0;
 314         mutex_enter(&tnf_drv_state.tnf_mtx);
 315         if (getminor(*devp) == TNFCTL_MINOR) {
 316                 if (tnf_drv_state.ctldev_is_open)
 317                         err = EBUSY;
 318                 else {
 319                         tnf_drv_state.ctldev_is_open = B_TRUE;
 320                         /* stop autounloading -- XXX temporary */
 321                         modunload_disable();
 322                 }
 323         } else {
 324                 /* ASSERT(getminor(*devp) == TNFMAP_MINOR) */
 325                 ++tnf_drv_state.mapdev_open_count;
 326         }
 327         mutex_exit(&tnf_drv_state.tnf_mtx);
 328         return (err);
 329 }
 330 
 331 /* ARGSUSED */
 332 static int
 333 tnf_close(dev_t dev, int flag, int otyp, struct cred *cred)
 334 {
 335         if (getminor(dev) == TNFCTL_MINOR) {
 336                 /*
 337                  * Request the reenablement of autounloading
 338                  */
 339                 modunload_enable();
 340                 tnf_drv_state.ctldev_is_open = B_FALSE;
 341         } else {
 342                 /* ASSERT(getminor(dev) == TNFMAP_MINOR) */
 343                 /*
 344                  * Unconditionally zero the open count since close()
 345                  * is called when last client closes the device.
 346                  */
 347                 tnf_drv_state.mapdev_open_count = 0;
 348         }
 349         return (0);
 350 }
 351 
 352 /*
 353  * return the address of the image referenced by dev.
 354  *
 355  * 1191344: aliasing problem on VAC machines.  It could be made to
 356  * work by ensuring that tnf_buf is allocated on a vac_size boundary.
 357  */
 358 #ifdef UNUSED
 359 /*ARGSUSED*/
 360 static int
 361 tnf_mmap(dev_t dev, off_t off, int prot)
 362 {
 363         register caddr_t addr;
 364         register caddr_t pg_offset;
 365 
 366         if (getminor(dev) != TNFMAP_MINOR)
 367                 return (-1);
 368         if (tnf_buf == 0 || off >= tnf_trace_file_size) {
 369                 return (-1);
 370         }
 371 
 372         addr = tnf_buf;
 373         pg_offset = (caddr_t)((ulong_t)addr + (ulong_t)off);
 374         return ((int)hat_getpfnum(kas.a_hat, pg_offset));
 375 }
 376 #endif
 377 
 378 /*ARGSUSED4*/
 379 static int
 380 tnf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
 381         cred_t *credp, int *rvalp)
 382 {
 383         int filterval = 1;
 384 
 385         if ((mode & FMODELS) != FNATIVE)
 386                 return (ENOTSUP);
 387 
 388         if (getminor(dev) != TNFCTL_MINOR &&
 389             cmd != TIFIOCGSTATE &&
 390             cmd != TIFIOCGHEADER &&
 391             cmd != TIFIOCGBLOCK &&
 392             cmd != TIFIOCGFWZONE)
 393                 return (EINVAL);
 394 
 395         switch (cmd) {
 396         case TIFIOCGMAXPROBE:
 397                 return (tnf_getmaxprobe((caddr_t)arg, mode));
 398         case TIFIOCGPROBEVALS:
 399                 return (tnf_getprobevals((caddr_t)arg, mode));
 400         case TIFIOCGPROBESTRING:
 401                 return (tnf_getprobestring((caddr_t)arg, mode));
 402         case TIFIOCSPROBEVALS:
 403                 return (tnf_setprobevals((caddr_t)arg, mode));
 404         case TIFIOCGSTATE:
 405                 return (tnf_getstate((caddr_t)arg, mode));
 406         case TIFIOCALLOCBUF:
 407                 return (tnf_allocbuf(arg));
 408         case TIFIOCDEALLOCBUF:
 409                 return (tnf_deallocbuf());
 410         case TIFIOCSTRACING:
 411                 /* LINTED cast from 64-bit integer to 32-bit integer */
 412                 return (tnf_settracing((int)arg));
 413         case TIFIOCSPIDFILTER:
 414                 /* LINTED cast from 64-bit integer to 32-bit integer */
 415                 return (tnf_pidfilterset((int)arg));
 416         case TIFIOCGPIDSTATE:
 417                 return (tnf_getpidstate((caddr_t)arg, mode));
 418         case TIFIOCSPIDOFF:
 419                 filterval = 0;
 420                 /*FALLTHROUGH*/
 421         case TIFIOCSPIDON:
 422                 /* LINTED cast from 64-bit integer to 32-bit integer */
 423                 return (tnf_setpidstate(filterval, (pid_t)arg, mode));
 424         case TIFIOCPIDFILTERGET:
 425                 return (tnf_pidfilterget((caddr_t)arg, mode));
 426         case TIFIOCGHEADER:
 427                 return (tnf_getheader((caddr_t)arg, mode));
 428         case TIFIOCGBLOCK:
 429                 return (tnf_getblock((caddr_t)arg, mode));
 430         case TIFIOCGFWZONE:
 431                 return (tnf_getfwzone((caddr_t)arg, mode));
 432         default:
 433                 return (EINVAL);
 434         }
 435 }
 436 
 437 /*
 438  * ioctls
 439  */
 440 
 441 static int
 442 tnf_getmaxprobe(caddr_t arg, int mode)
 443 {
 444         tnf_probe_control_t *p;
 445         /*
 446          * XXX Still not right for module unload -- just counting
 447          * the probes is not enough
 448          */
 449         if (tnf_changed_probe_list) {
 450                 mutex_enter(&mod_lock);
 451                 tnf_changed_probe_list = 0;
 452                 tnf_drv_state.tnf_probe_count = 0;
 453                 for (p = (tnf_probe_control_t *)__tnf_probe_list_head;
 454                     p != 0; p = p->next)
 455                         ++tnf_drv_state.tnf_probe_count;
 456                 mutex_exit(&mod_lock);
 457         }
 458         if (ddi_copyout((caddr_t)&tnf_drv_state.tnf_probe_count,
 459             arg, sizeof (tnf_drv_state.tnf_probe_count), mode))
 460                 return (EFAULT);
 461         return (0);
 462 }
 463 
 464 static int
 465 tnf_getprobevals(caddr_t arg, int mode)
 466 {
 467         tnf_probevals_t probebuf;
 468         tnf_probe_control_t *p;
 469         int i, retval = 0;
 470 
 471         if (ddi_copyin(arg, (caddr_t)&probebuf, sizeof (probebuf), mode))
 472                 return (EFAULT);
 473 
 474         mutex_enter(&mod_lock);
 475         for (i = 1, p = (tnf_probe_control_t *)__tnf_probe_list_head;
 476             p != NULL && i != probebuf.probenum;
 477             ++i, p = p->next)
 478                 ;
 479         if (p == NULL)
 480                 retval = ENOENT;
 481         else {
 482                 probebuf.enabled = (p->test_func != NULL);
 483                 probebuf.traced = (p->probe_func == tnf_trace_commit);
 484                 /* LINTED assignment of 64-bit integer to 32-bit integer */
 485                 probebuf.attrsize = strlen(p->attrs) + 1;
 486                 if (ddi_copyout((caddr_t)&probebuf,
 487                     arg, sizeof (probebuf), mode))
 488                         retval = EFAULT;
 489         }
 490         mutex_exit(&mod_lock);
 491         return (retval);
 492 }
 493 
 494 static int
 495 tnf_getprobestring(caddr_t arg, int mode)
 496 {
 497         tnf_probevals_t probebuf;
 498         tnf_probe_control_t *p;
 499         int i, retval = 0;
 500 
 501         if (ddi_copyin(arg, (caddr_t)&probebuf, sizeof (probebuf), mode))
 502                 return (EFAULT);
 503 
 504         mutex_enter(&mod_lock);
 505         for (i = 1, p = (tnf_probe_control_t *)__tnf_probe_list_head;
 506             p != NULL && i != probebuf.probenum;
 507             ++i, p = p->next)
 508                 ;
 509         if (p == NULL)
 510                 retval = ENOENT;
 511         else if (ddi_copyout((caddr_t)p->attrs,
 512             arg, strlen(p->attrs) + 1, mode))
 513                 retval = EFAULT;
 514         mutex_exit(&mod_lock);
 515         return (retval);
 516 }
 517 
 518 static int
 519 tnf_setprobevals(caddr_t arg, int mode)
 520 {
 521         tnf_probevals_t probebuf;
 522         tnf_probe_control_t *p;
 523         int i, retval = 0;
 524 
 525         if (ddi_copyin(arg, (caddr_t)&probebuf, sizeof (probebuf), mode))
 526                 return (EFAULT);
 527 
 528         mutex_enter(&mod_lock);
 529         for (i = 1, p = (tnf_probe_control_t *)__tnf_probe_list_head;
 530             p != NULL && i != probebuf.probenum;
 531             ++i, p = p->next)
 532                 ;
 533         if (p == NULL)
 534                 retval = ENOENT;
 535         else {
 536                 /*
 537                  * First do trace, then enable.
 538                  * Set test_func last.
 539                  */
 540                 if (probebuf.traced)
 541                         p->probe_func = tnf_trace_commit;
 542                 else
 543                         p->probe_func = tnf_trace_rollback;
 544                 if (probebuf.enabled) {
 545                         p->alloc_func = tnf_trace_alloc;
 546                         /* this must be set last */
 547                         if (tnf_drv_state.tnf_pidfilter_mode)
 548                                 p->test_func = tnf_test_2;
 549                         else
 550                                 p->test_func = tnf_test_1;
 551                 } else
 552                         p->test_func = NULL;
 553         }
 554         mutex_exit(&mod_lock);
 555         return (retval);
 556 }
 557 
 558 static int
 559 tnf_getstate(caddr_t arg, int mode)
 560 {
 561         tifiocstate_t   tstate;
 562         proc_t          *procp;
 563 
 564         if (tnf_buf == NULL) {
 565                 tstate.buffer_state = TIFIOCBUF_NONE;
 566                 tstate.buffer_size = 0;
 567         } else {
 568                 switch (tnfw_b_state & ~TNFW_B_STOPPED) {
 569                 case TNFW_B_RUNNING:
 570                         tstate.buffer_state = TIFIOCBUF_OK;
 571                         break;
 572                 case TNFW_B_NOBUFFER:
 573                         tstate.buffer_state = TIFIOCBUF_UNINIT;
 574                         break;
 575                 case TNFW_B_BROKEN:
 576                         tstate.buffer_state = TIFIOCBUF_BROKEN;
 577                         break;
 578                 }
 579                 /* LINTED assignment of 64-bit integer to 32-bit integer */
 580                 tstate.buffer_size = tnf_trace_file_size;
 581         }
 582         tstate.trace_stopped = tnfw_b_state & TNFW_B_STOPPED;
 583         tstate.pidfilter_mode = tnf_drv_state.tnf_pidfilter_mode;
 584         tstate.pidfilter_size = 0;
 585 
 586         mutex_enter(&pidlock);
 587         for (procp = practive; procp != NULL; procp = procp->p_next)
 588                 if (PROC_IS_FILTER(procp))
 589                         tstate.pidfilter_size++;
 590         mutex_exit(&pidlock);
 591 
 592         if (ddi_copyout((caddr_t)&tstate, arg, sizeof (tstate), mode))
 593                 return (EFAULT);
 594         return (0);
 595 }
 596 
 597 static int
 598 tnf_allocbuf(intptr_t arg)
 599 {
 600         size_t bufsz;
 601 
 602         if (tnf_buf != NULL)
 603                 return (EBUSY);
 604 
 605         bufsz = roundup((size_t)arg, PAGESIZE);
 606         /*
 607          * Validate size
 608          * XXX Take kernel VM into consideration as well
 609          */
 610         /* bug fix #4057599 if (bufsz > (physmem << PAGESHIFT) / 2) */
 611         if (btop(bufsz) > (physmem / 2))
 612                 return (ENOMEM);
 613         if (bufsz < TNF_TRACE_FILE_MIN)
 614                 bufsz = TNF_TRACE_FILE_MIN;
 615 
 616 #if TNF_USE_KMA
 617         tnf_buf = kmem_zalloc(bufsz, KM_SLEEP);
 618 #else
 619         /* LINTED cast from 64-bit integer to 32-bit intege */
 620         tnf_buf = segkp_get(segkp, (int)bufsz,
 621             KPD_ZERO | KPD_LOCKED | KPD_NO_ANON);
 622 #endif
 623         if (tnf_buf == NULL)
 624                 return (ENOMEM);
 625 
 626         tnf_trace_file_size = bufsz;
 627         tnf_trace_init();
 628         return (0);
 629 }
 630 
 631 /*
 632  * Process a "deallocate buffer" ioctl request.  Tracing must be turned
 633  * off.  We must clear references to the buffer from the tag sites;
 634  * invalidate all threads' notions of block ownership; make sure nobody
 635  * is executing a probe (they might have started before tracing was
 636  * turned off); and free the buffer.
 637  */
 638 static int
 639 tnf_deallocbuf(void)
 640 {
 641         tnf_ops_t *tpdp;
 642         kthread_t *t;
 643         tnf_probe_control_t *probep;
 644         tnf_tag_data_t *tagp;
 645 
 646         if (tnf_drv_state.mapdev_open_count > 0 || tnf_tracing_active)
 647                 return (EBUSY);
 648         if (tnf_buf == NULL)
 649                 return (ENOMEM);
 650 
 651         /*
 652          * Make sure nobody is executing a probe.
 653          * (They could be if they got started while
 654          * tnf_tracing_active was still on.)  Grab
 655          * pidlock, and check the busy flag in all
 656          * TPDP's.
 657          */
 658         mutex_enter(&pidlock);
 659         t = curthread;
 660         do {
 661                 if (t->t_tnf_tpdp != NULL) {
 662                 /* LINTED pointer cast may result in improper alignment */
 663                         tpdp = (tnf_ops_t *)t->t_tnf_tpdp;
 664                         if (LOCK_HELD(&tpdp->busy)) {
 665                                 mutex_exit(&pidlock);
 666                                 return (EBUSY);
 667                         }
 668                         tpdp->wcb.tnfw_w_pos.tnfw_w_block = NULL;
 669                         tpdp->wcb.tnfw_w_tag_pos.tnfw_w_block = NULL;
 670                         tpdp->schedule.record_p = NULL;
 671                 }
 672                 t = t->t_next;
 673         } while (t != curthread);
 674         mutex_exit(&pidlock);
 675 
 676         /*
 677          * Zap all references to the buffer we're freeing.
 678          * Grab mod_lock while walking list to keep it
 679          * consistent.
 680          */
 681         mutex_enter(&mod_lock);
 682         tagp = (tnf_tag_data_t *)__tnf_tag_list_head;
 683         while (tagp != NULL) {
 684                 tagp->tag_index = 0;
 685                 tagp = (tnf_tag_data_t *)tagp->tag_version;
 686         }
 687         probep = (tnf_probe_control_t *)__tnf_probe_list_head;
 688         while (probep != NULL) {
 689                 probep->index = 0;
 690                 probep = probep->next;
 691         }
 692         mutex_exit(&mod_lock);
 693 
 694         tnfw_b_state = TNFW_B_NOBUFFER | TNFW_B_STOPPED;
 695 #if TNF_USE_KMA
 696         kmem_free(tnf_buf, tnf_trace_file_size);
 697 #else
 698         segkp_release(segkp, tnf_buf);
 699 #endif
 700         tnf_buf = NULL;
 701 
 702         return (0);
 703 }
 704 
 705 static int
 706 tnf_settracing(int arg)
 707 {
 708         if (arg)
 709                 if (tnf_buf == NULL)
 710                         return (ENOMEM);
 711                 else
 712                         tnf_trace_on();
 713         else
 714                 tnf_trace_off();
 715 
 716 #ifdef _TNF_SPEED_TEST
 717 #define NITER   255
 718         {
 719                 int i;
 720 
 721                 for (i = 0; i < NITER; i++)
 722                         TNF_PROBE_0(tnf_speed_0, "tnf", /* CSTYLED */);
 723                 for (i = 0; i < NITER; i++)
 724                         TNF_PROBE_1(tnf_speed_1, "tnf", /* CSTYLED */,
 725                             tnf_long,   long,   i);
 726                 for (i = 0; i < NITER; i++)
 727                         TNF_PROBE_2(tnf_speed_2, "tnf", /* CSTYLED */,
 728                             tnf_long,   long1,  i,
 729                             tnf_long,   long2,  i);
 730         }
 731 #endif /* _TNF_SPEED_TEST */
 732 
 733         return (0);
 734 }
 735 
 736 static int
 737 tnf_getpidstate(caddr_t arg, int mode)
 738 {
 739         int     err = 0;
 740         pid_t   pid;
 741         proc_t  *procp;
 742         int     result;
 743 
 744         if (ddi_copyin(arg, (caddr_t)&pid, sizeof (pid), mode))
 745                 return (EFAULT);
 746 
 747         mutex_enter(&pidlock);
 748         if ((procp = prfind(pid)) != NULL)
 749                 result = PROC_IS_FILTER(procp);
 750         else
 751                 err = ESRCH;
 752         mutex_exit(&pidlock);
 753 
 754         if (!err)
 755                 if (ddi_copyout((caddr_t)&result, (caddr_t)arg,
 756                     sizeof (result), mode))
 757                         return (EFAULT);
 758         return (err);
 759 }
 760 
 761 /*ARGSUSED*/
 762 static int
 763 tnf_setpidstate(int filterval, pid_t pid, int mode)
 764 {
 765         int     err = 0;
 766         proc_t  *procp;
 767 
 768         mutex_enter(&pidlock);
 769         if ((procp = prfind(pid)) != NULL)
 770                 if (filterval)
 771                         PROC_FILTER_SET(procp);
 772                 else
 773                         PROC_FILTER_CLR(procp);
 774         else
 775                 err = ESRCH;
 776         mutex_exit(&pidlock);
 777 
 778         return (err);
 779 }
 780 
 781 static int
 782 tnf_pidfilterset(int mode)
 783 {
 784         tnf_probe_control_t     *p;
 785         tnf_probe_test_func_t   func;
 786 
 787         tnf_drv_state.tnf_pidfilter_mode = mode;
 788 
 789         /* Establish correct test func for each probe */
 790         if (mode)
 791                 func = tnf_test_2;
 792         else
 793                 func = tnf_test_1;
 794 
 795         mutex_enter(&mod_lock);
 796         p = (tnf_probe_control_t *)__tnf_probe_list_head;
 797         while (p != NULL) {
 798                 if (p->test_func != NULL)
 799                         p->test_func = func;
 800                 p = p->next;
 801         }
 802         mutex_exit(&mod_lock);
 803 
 804         return (0);
 805 }
 806 
 807 static int
 808 tnf_pidfilterget(caddr_t dest, int mode)
 809 {
 810         int err = 0;
 811         int filtercount = 0;
 812         size_t  sz;
 813         pid_t   *filterbuf, *bufp;
 814         proc_t  *procp;
 815 
 816         /* Count how many processes in filter set (upper bound) */
 817         mutex_enter(&pidlock);
 818         for (procp = practive; procp != NULL; procp = procp->p_next)
 819                 if (PROC_IS_FILTER(procp))
 820                         filtercount++;
 821         mutex_exit(&pidlock);
 822 
 823         /* Allocate temp space to hold filter set (upper bound) */
 824         sz = sizeof (pid_t) * (filtercount + 1);
 825         filterbuf = kmem_zalloc(sz, KM_SLEEP);
 826 
 827         /*
 828          * NOTE: The filter set cannot grow between the first and
 829          * second acquisitions of pidlock.  This is currently true
 830          * because:
 831          *      1. /dev/tnfctl is exclusive open, so all driver
 832          *         control operations, including changing the filter
 833          *         set and this code, are effectively single-threaded.
 834          *      2. There is no in-kernel API to manipulate the filter
 835          *         set (i.e. toggle the on/off bit in a proc struct).
 836          *      3. The proc filter bit is not inherited across a fork()
 837          *         operation; the child starts with the bit off.
 838          * If any of these assumptions is invalidated, a possible
 839          * solution is to check whether we're overflowing the allocated
 840          * filterbuf below, and back out and restart from the beginning
 841          * if so.
 842          *
 843          * The code below handles the case when the filter set shrinks
 844          * due to processes exiting.
 845          */
 846 
 847         /* Fill in filter set */
 848         bufp = filterbuf + 1;   /* first word is for count */
 849         filtercount = 0;        /* recomputed below */
 850         mutex_enter(&pidlock);
 851         for (procp = practive; procp != NULL; procp = procp->p_next) {
 852                 if (PROC_IS_FILTER(procp)) {
 853                         filtercount++;
 854                         *bufp++ = procp->p_pid;
 855                 }
 856         }
 857         mutex_exit(&pidlock);
 858 
 859         /* Set filtercount */
 860         *filterbuf = (pid_t)filtercount;
 861 
 862         /* Copy out result */
 863         if (ddi_copyout((caddr_t)filterbuf, dest, sz, mode))
 864                 err = EFAULT;
 865 
 866         /* Free temp space */
 867         kmem_free(filterbuf, sz);
 868 
 869         return (err);
 870 }
 871 
 872 static int
 873 tnf_getheader(caddr_t arg, int mode)
 874 {
 875         if (tnf_buf == NULL)
 876                 return (ENOMEM);
 877         if (ddi_copyout(tnf_buf, arg, TNF_BLOCK_SIZE, mode))
 878                 return (EFAULT);
 879         return (0);
 880 }
 881 
 882 static int
 883 tnf_getblock(caddr_t arg, int mode)
 884 {
 885         int             err = 0;
 886         tifiocgblock_t  parms;
 887         caddr_t         area;
 888         tnf_block_header_t      *blk;
 889 
 890         if (tnf_buf == NULL)
 891                 return (ENOMEM);
 892         if (ddi_copyin(arg, (caddr_t)&parms, sizeof (parms), mode))
 893                 return (EFAULT);
 894         area = tnf_buf + TNF_DIRECTORY_SIZE +
 895             parms.block_num * TNF_BLOCK_SIZE;
 896         if (area < tnf_buf + TNF_DIRECTORY_SIZE ||
 897             area >= tnf_buf + tnf_trace_file_size)
 898                 return (EFAULT);
 899         /* LINTED pointer cast */
 900         blk = (tnf_block_header_t *)area;
 901         /*
 902          * B-lock the block while we're reading
 903          */
 904         if (!lock_try(&blk->B_lock))
 905                 return (EBUSY);
 906         if (ddi_copyout(area, parms.dst_addr, TNF_BLOCK_SIZE, mode))
 907                 err = EFAULT;
 908         lock_clear(&blk->B_lock);
 909         return (err);
 910 }
 911 
 912 static int
 913 tnf_getfwzone(caddr_t arg, int mode)
 914 {
 915         tifiocgfw_t parms;
 916 
 917         if (tnf_buf == NULL)
 918                 return (ENOMEM);
 919         if (ddi_copyin(arg, (caddr_t)&parms, sizeof (parms), mode))
 920                 return (EFAULT);
 921         if (ddi_copyout(tnf_buf + TNF_BLOCK_SIZE + parms.start *
 922             sizeof (tnf_ref32_t), (caddr_t)parms.dst_addr,
 923             parms.slots * (int)(sizeof (tnf_ref32_t)), mode))
 924                 return (EFAULT);
 925         return (0);
 926 }
 927 
 928 /*ARGSUSED*/
 929 static void *
 930 tnf_test_1(void *tpdp, tnf_probe_control_t *probe_p, tnf_probe_setup_t *sp)
 931 {
 932         tpdp = (void *)curthread->t_tnf_tpdp;
 933         if (tpdp != NULL)
 934                 return (tnf_trace_alloc((tnf_ops_t *)tpdp, probe_p, sp));
 935         return (NULL);
 936 }
 937 
 938 /*ARGSUSED*/
 939 static void *
 940 tnf_test_2(void *tpdp, tnf_probe_control_t *probe_p, tnf_probe_setup_t *sp)
 941 {
 942         tpdp = (void *)curthread->t_tnf_tpdp;
 943         if (tpdp != NULL && PROC_IS_FILTER(curproc))
 944                 return (tnf_trace_alloc((tnf_ops_t *)tpdp, probe_p, sp));
 945         return (NULL);
 946 }
 947 
 948 #endif /* !NPROBE */