1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2014, Joyent, Inc. All rights reserved.
  24  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2016 Garrett D'Amore
  26  */
  27 
  28 /*
  29  * Kernel statistics framework
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/time.h>
  34 #include <sys/systm.h>
  35 #include <sys/vmsystm.h>
  36 #include <sys/t_lock.h>
  37 #include <sys/param.h>
  38 #include <sys/errno.h>
  39 #include <sys/vmem.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/kstat.h>
  43 #include <sys/sysinfo.h>
  44 #include <sys/cpuvar.h>
  45 #include <sys/fcntl.h>
  46 #include <sys/flock.h>
  47 #include <sys/vnode.h>
  48 #include <sys/vfs.h>
  49 #include <sys/dnlc.h>
  50 #include <sys/var.h>
  51 #include <sys/debug.h>
  52 #include <sys/kobj.h>
  53 #include <sys/avl.h>
  54 #include <sys/pool_pset.h>
  55 #include <sys/cpupart.h>
  56 #include <sys/zone.h>
  57 #include <sys/loadavg.h>
  58 #include <vm/page.h>
  59 #include <vm/anon.h>
  60 #include <vm/seg_kmem.h>
  61 
  62 /*
  63  * Global lock to protect the AVL trees and kstat_chain_id.
  64  */
  65 static kmutex_t kstat_chain_lock;
  66 
  67 /*
  68  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
  69  *
  70  * (1)  /dev/kstat, to detect changes in the kstat chain across ioctls;
  71  *
  72  * (2)  kstat_create(), to assign a KID (kstat ID) to each new kstat.
  73  *      /dev/kstat uses the KID as a cookie for kstat lookups.
  74  *
  75  * We reserve the first two IDs because some kstats are created before
  76  * the well-known ones (kstat_headers = 0, kstat_types = 1).
  77  *
  78  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
  79  * into a particular kstat, which is logically equivalent to a kstat being
  80  * installed/deleted.
  81  */
  82 
  83 kid_t kstat_chain_id = 2;
  84 
  85 /*
  86  * As far as zones are concerned, there are 3 types of kstat:
  87  *
  88  * 1) Those which have a well-known name, and which should return per-zone data
  89  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
  90  * is an example of this type of kstat.
  91  *
  92  * 2) Those which should only be exported to a particular list of zones.
  93  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
  94  * able to see NFS mounts associated with zone B, while we want the
  95  * global zone to be able to see all mounts on the system.
  96  *
  97  * 3) Those that can be exported to all zones.  Most system-related
  98  * kstats fall within this category.
  99  *
 100  * An ekstat_t thus contains a list of kstats that the zone is to be
 101  * exported to.  The lookup of a name:instance:module thus translates to a
 102  * lookup of name:instance:module:myzone; if the kstat is not exported
 103  * to all zones, and does not have the caller's zoneid explicitly
 104  * enumerated in the list of zones to be exported to, it is the same as
 105  * if the kstat didn't exist.
 106  *
 107  * Writing to kstats is currently disallowed from within a non-global
 108  * zone, although this restriction could be removed in the future.
 109  */
 110 typedef struct kstat_zone {
 111         zoneid_t zoneid;
 112         struct kstat_zone *next;
 113 } kstat_zone_t;
 114 
 115 /*
 116  * Extended kstat structure -- for internal use only.
 117  */
 118 typedef struct ekstat {
 119         kstat_t         e_ks;           /* the kstat itself */
 120         size_t          e_size;         /* total allocation size */
 121         kthread_t       *e_owner;       /* thread holding this kstat */
 122         kcondvar_t      e_cv;           /* wait for owner == NULL */
 123         avl_node_t      e_avl_bykid;    /* AVL tree to sort by KID */
 124         avl_node_t      e_avl_byname;   /* AVL tree to sort by name */
 125         kstat_zone_t    e_zone;         /* zone to export stats to */
 126 } ekstat_t;
 127 
 128 static uint64_t kstat_initial[8192];
 129 static void *kstat_initial_ptr = kstat_initial;
 130 static size_t kstat_initial_avail = sizeof (kstat_initial);
 131 static vmem_t *kstat_arena;
 132 
 133 #define KSTAT_ALIGN     (sizeof (uint64_t))
 134 
 135 static avl_tree_t kstat_avl_bykid;
 136 static avl_tree_t kstat_avl_byname;
 137 
 138 /*
 139  * Various pointers we need to create kstats at boot time in kstat_init()
 140  */
 141 extern  kstat_named_t   *segmapcnt_ptr;
 142 extern  uint_t          segmapcnt_ndata;
 143 extern  int             segmap_kstat_update(kstat_t *, int);
 144 extern  kstat_named_t   *biostats_ptr;
 145 extern  uint_t          biostats_ndata;
 146 extern  kstat_named_t   *pollstats_ptr;
 147 extern  uint_t          pollstats_ndata;
 148 
 149 extern  int     vac;
 150 extern  uint_t  nproc;
 151 extern  time_t  boot_time;
 152 extern  hrtime_t        boot_hrtime;
 153 extern  sysinfo_t       sysinfo;
 154 extern  vminfo_t        vminfo;
 155 
 156 struct {
 157         kstat_named_t ncpus;
 158         kstat_named_t lbolt;
 159         kstat_named_t deficit;
 160         kstat_named_t clk_intr;
 161         kstat_named_t vac;
 162         kstat_named_t nproc;
 163         kstat_named_t avenrun_1min;
 164         kstat_named_t avenrun_5min;
 165         kstat_named_t avenrun_15min;
 166         kstat_named_t boot_time;
 167         kstat_named_t boot_hrtime;
 168         kstat_named_t nsec_per_tick;
 169 } system_misc_kstat = {
 170         { "ncpus",              KSTAT_DATA_UINT32 },
 171         { "lbolt",              KSTAT_DATA_UINT32 },
 172         { "deficit",            KSTAT_DATA_UINT32 },
 173         { "clk_intr",           KSTAT_DATA_UINT32 },
 174         { "vac",                KSTAT_DATA_UINT32 },
 175         { "nproc",              KSTAT_DATA_UINT32 },
 176         { "avenrun_1min",       KSTAT_DATA_UINT32 },
 177         { "avenrun_5min",       KSTAT_DATA_UINT32 },
 178         { "avenrun_15min",      KSTAT_DATA_UINT32 },
 179         { "boot_time",          KSTAT_DATA_UINT32 },
 180         { "boot_hrtime",        KSTAT_DATA_TIME },
 181         { "nsec_per_tick",      KSTAT_DATA_UINT32 },
 182 };
 183 
 184 struct {
 185         kstat_named_t physmem;
 186         kstat_named_t nalloc;
 187         kstat_named_t nfree;
 188         kstat_named_t nalloc_calls;
 189         kstat_named_t nfree_calls;
 190         kstat_named_t kernelbase;
 191         kstat_named_t econtig;
 192         kstat_named_t freemem;
 193         kstat_named_t availrmem;
 194         kstat_named_t lotsfree;
 195         kstat_named_t desfree;
 196         kstat_named_t minfree;
 197         kstat_named_t fastscan;
 198         kstat_named_t slowscan;
 199         kstat_named_t nscan;
 200         kstat_named_t desscan;
 201         kstat_named_t pp_kernel;
 202         kstat_named_t pagesfree;
 203         kstat_named_t pageslocked;
 204         kstat_named_t pagestotal;
 205 } system_pages_kstat = {
 206         { "physmem",            KSTAT_DATA_ULONG },
 207         { "nalloc",             KSTAT_DATA_ULONG },
 208         { "nfree",              KSTAT_DATA_ULONG },
 209         { "nalloc_calls",       KSTAT_DATA_ULONG },
 210         { "nfree_calls",        KSTAT_DATA_ULONG },
 211         { "kernelbase",         KSTAT_DATA_ULONG },
 212         { "econtig",            KSTAT_DATA_ULONG },
 213         { "freemem",            KSTAT_DATA_ULONG },
 214         { "availrmem",          KSTAT_DATA_ULONG },
 215         { "lotsfree",           KSTAT_DATA_ULONG },
 216         { "desfree",            KSTAT_DATA_ULONG },
 217         { "minfree",            KSTAT_DATA_ULONG },
 218         { "fastscan",           KSTAT_DATA_ULONG },
 219         { "slowscan",           KSTAT_DATA_ULONG },
 220         { "nscan",              KSTAT_DATA_ULONG },
 221         { "desscan",            KSTAT_DATA_ULONG },
 222         { "pp_kernel",          KSTAT_DATA_ULONG },
 223         { "pagesfree",          KSTAT_DATA_ULONG },
 224         { "pageslocked",        KSTAT_DATA_ULONG },
 225         { "pagestotal",         KSTAT_DATA_ULONG },
 226 };
 227 
 228 static int header_kstat_update(kstat_t *, int);
 229 static int header_kstat_snapshot(kstat_t *, void *, int);
 230 static int system_misc_kstat_update(kstat_t *, int);
 231 static int system_pages_kstat_update(kstat_t *, int);
 232 
 233 static struct {
 234         char    name[KSTAT_STRLEN];
 235         size_t  size;
 236         uint_t  min_ndata;
 237         uint_t  max_ndata;
 238 } kstat_data_type[KSTAT_NUM_TYPES] = {
 239         { "raw",                1,                      0,      INT_MAX },
 240         { "name=value",         sizeof (kstat_named_t), 0,      INT_MAX },
 241         { "interrupt",          sizeof (kstat_intr_t),  1,      1       },
 242         { "i/o",                sizeof (kstat_io_t),    1,      1       },
 243         { "event_timer",        sizeof (kstat_timer_t), 0,      INT_MAX },
 244 };
 245 
 246 int
 247 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
 248 {
 249         ekstat_t *e = (ekstat_t *)k;
 250         kstat_zone_t *kz;
 251 
 252         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 253         for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
 254                 if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
 255                         return (1);
 256                 if (zoneid == kz->zoneid)
 257                         return (1);
 258         }
 259         return (0);
 260 }
 261 
 262 void
 263 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
 264 {
 265         ekstat_t *e = (ekstat_t *)k;
 266         kstat_zone_t *kz, *t = NULL;
 267 
 268         mutex_enter(&kstat_chain_lock);
 269         if (zoneid == e->e_zone.zoneid) {
 270                 kz = e->e_zone.next;
 271                 ASSERT(kz != NULL);
 272                 e->e_zone.zoneid = kz->zoneid;
 273                 e->e_zone.next = kz->next;
 274                 goto out;
 275         }
 276         for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
 277                 if (kz->next->zoneid == zoneid) {
 278                         t = kz->next;
 279                         kz->next = t->next;
 280                         break;
 281                 }
 282         }
 283         ASSERT(t != NULL);      /* we removed something */
 284         kz = t;
 285 out:
 286         kstat_chain_id++;
 287         mutex_exit(&kstat_chain_lock);
 288         kmem_free(kz, sizeof (*kz));
 289 }
 290 
 291 void
 292 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
 293 {
 294         ekstat_t *e = (ekstat_t *)k;
 295         kstat_zone_t *kz;
 296 
 297         kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
 298         if (kz == NULL)
 299                 return;
 300         mutex_enter(&kstat_chain_lock);
 301         kz->zoneid = zoneid;
 302         kz->next = e->e_zone.next;
 303         e->e_zone.next = kz;
 304         kstat_chain_id++;
 305         mutex_exit(&kstat_chain_lock);
 306 }
 307 
 308 /*
 309  * Compare the list of zones for the given kstats, returning 0 if they match
 310  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
 311  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
 312  * two lists always has one element, and this is an O(n) operation rather than
 313  * O(n^2).
 314  */
 315 static int
 316 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
 317 {
 318         kstat_zone_t *kz1, *kz2;
 319 
 320         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 321         for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
 322                 for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
 323                         if (kz1->zoneid == ALL_ZONES ||
 324                             kz2->zoneid == ALL_ZONES)
 325                                 return (0);
 326                         if (kz1->zoneid == kz2->zoneid)
 327                                 return (0);
 328                 }
 329         }
 330         return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
 331 }
 332 
 333 /*
 334  * Support for keeping kstats sorted in AVL trees for fast lookups.
 335  */
 336 static int
 337 kstat_compare_bykid(const void *a1, const void *a2)
 338 {
 339         const kstat_t *k1 = a1;
 340         const kstat_t *k2 = a2;
 341 
 342         if (k1->ks_kid < k2->ks_kid)
 343                 return (-1);
 344         if (k1->ks_kid > k2->ks_kid)
 345                 return (1);
 346         return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 347 }
 348 
 349 static int
 350 kstat_compare_byname(const void *a1, const void *a2)
 351 {
 352         const kstat_t *k1 = a1;
 353         const kstat_t *k2 = a2;
 354         int s;
 355 
 356         s = strcmp(k1->ks_module, k2->ks_module);
 357         if (s > 0)
 358                 return (1);
 359         if (s < 0)
 360                 return (-1);
 361 
 362         if (k1->ks_instance < k2->ks_instance)
 363                 return (-1);
 364         if (k1->ks_instance > k2->ks_instance)
 365                 return (1);
 366 
 367         s = strcmp(k1->ks_name, k2->ks_name);
 368         if (s > 0)
 369                 return (1);
 370         if (s < 0)
 371                 return (-1);
 372 
 373         return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 374 }
 375 
 376 static kstat_t *
 377 kstat_hold(avl_tree_t *t, ekstat_t *template)
 378 {
 379         kstat_t *ksp;
 380         ekstat_t *e;
 381 
 382         mutex_enter(&kstat_chain_lock);
 383         for (;;) {
 384                 ksp = avl_find(t, template, NULL);
 385                 if (ksp == NULL)
 386                         break;
 387                 e = (ekstat_t *)ksp;
 388                 if (e->e_owner == NULL) {
 389                         e->e_owner = curthread;
 390                         break;
 391                 }
 392                 cv_wait(&e->e_cv, &kstat_chain_lock);
 393         }
 394         mutex_exit(&kstat_chain_lock);
 395         return (ksp);
 396 }
 397 
 398 void
 399 kstat_rele(kstat_t *ksp)
 400 {
 401         ekstat_t *e = (ekstat_t *)ksp;
 402 
 403         mutex_enter(&kstat_chain_lock);
 404         ASSERT(e->e_owner == curthread);
 405         e->e_owner = NULL;
 406         cv_broadcast(&e->e_cv);
 407         mutex_exit(&kstat_chain_lock);
 408 }
 409 
 410 kstat_t *
 411 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
 412 {
 413         ekstat_t e;
 414 
 415         e.e_ks.ks_kid = kid;
 416         e.e_zone.zoneid = zoneid;
 417         e.e_zone.next = NULL;
 418 
 419         return (kstat_hold(&kstat_avl_bykid, &e));
 420 }
 421 
 422 kstat_t *
 423 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
 424     zoneid_t ks_zoneid)
 425 {
 426         ekstat_t e;
 427 
 428         kstat_set_string(e.e_ks.ks_module, ks_module);
 429         e.e_ks.ks_instance = ks_instance;
 430         kstat_set_string(e.e_ks.ks_name, ks_name);
 431         e.e_zone.zoneid = ks_zoneid;
 432         e.e_zone.next = NULL;
 433         return (kstat_hold(&kstat_avl_byname, &e));
 434 }
 435 
 436 static ekstat_t *
 437 kstat_alloc(size_t size)
 438 {
 439         ekstat_t *e = NULL;
 440 
 441         size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
 442 
 443         if (kstat_arena == NULL) {
 444                 if (size <= kstat_initial_avail) {
 445                         e = kstat_initial_ptr;
 446                         kstat_initial_ptr = (char *)kstat_initial_ptr + size;
 447                         kstat_initial_avail -= size;
 448                 }
 449         } else {
 450                 e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
 451         }
 452 
 453         if (e != NULL) {
 454                 bzero(e, size);
 455                 e->e_size = size;
 456                 cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
 457         }
 458 
 459         return (e);
 460 }
 461 
 462 static void
 463 kstat_free(ekstat_t *e)
 464 {
 465         cv_destroy(&e->e_cv);
 466         vmem_free(kstat_arena, e, e->e_size);
 467 }
 468 
 469 /*
 470  * Create various system kstats.
 471  */
 472 void
 473 kstat_init(void)
 474 {
 475         kstat_t *ksp;
 476         ekstat_t *e;
 477         avl_tree_t *t = &kstat_avl_bykid;
 478 
 479         /*
 480          * Set up the kstat vmem arena.
 481          */
 482         kstat_arena = vmem_create("kstat",
 483             kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
 484             segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
 485 
 486         /*
 487          * Make initial kstats appear as though they were allocated.
 488          */
 489         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
 490                 (void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
 491                     0, 0, e, (char *)e + e->e_size,
 492                     VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 493 
 494         /*
 495          * The mother of all kstats.  The first kstat in the system, which
 496          * always has KID 0, has the headers for all kstats (including itself)
 497          * as its data.  Thus, the kstat driver does not need any special
 498          * interface to extract the kstat chain.
 499          */
 500         kstat_chain_id = 0;
 501         ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
 502             0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
 503         if (ksp) {
 504                 ksp->ks_lock = &kstat_chain_lock;
 505                 ksp->ks_update = header_kstat_update;
 506                 ksp->ks_snapshot = header_kstat_snapshot;
 507                 kstat_install(ksp);
 508         } else {
 509                 panic("cannot create kstat 'kstat_headers'");
 510         }
 511 
 512         ksp = kstat_create("unix", 0, "kstat_types", "kstat",
 513             KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
 514         if (ksp) {
 515                 int i;
 516                 kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
 517 
 518                 for (i = 0; i < KSTAT_NUM_TYPES; i++) {
 519                         kstat_named_init(&kn[i], kstat_data_type[i].name,
 520                             KSTAT_DATA_ULONG);
 521                         kn[i].value.ul = i;
 522                 }
 523                 kstat_install(ksp);
 524         }
 525 
 526         ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
 527             sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
 528         if (ksp) {
 529                 ksp->ks_data = (void *) &sysinfo;
 530                 kstat_install(ksp);
 531         }
 532 
 533         ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
 534             sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
 535         if (ksp) {
 536                 ksp->ks_data = (void *) &vminfo;
 537                 kstat_install(ksp);
 538         }
 539 
 540         ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
 541             segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
 542         if (ksp) {
 543                 ksp->ks_data = (void *) segmapcnt_ptr;
 544                 ksp->ks_update = segmap_kstat_update;
 545                 kstat_install(ksp);
 546         }
 547 
 548         ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
 549             biostats_ndata, KSTAT_FLAG_VIRTUAL);
 550         if (ksp) {
 551                 ksp->ks_data = (void *) biostats_ptr;
 552                 kstat_install(ksp);
 553         }
 554 
 555         ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
 556             sizeof (struct var), KSTAT_FLAG_VIRTUAL);
 557         if (ksp) {
 558                 ksp->ks_data = (void *) &v;
 559                 kstat_install(ksp);
 560         }
 561 
 562         ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
 563             sizeof (system_misc_kstat) / sizeof (kstat_named_t),
 564             KSTAT_FLAG_VIRTUAL);
 565         if (ksp) {
 566                 ksp->ks_data = (void *) &system_misc_kstat;
 567                 ksp->ks_update = system_misc_kstat_update;
 568                 kstat_install(ksp);
 569         }
 570 
 571         ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
 572             sizeof (system_pages_kstat) / sizeof (kstat_named_t),
 573             KSTAT_FLAG_VIRTUAL);
 574         if (ksp) {
 575                 ksp->ks_data = (void *) &system_pages_kstat;
 576                 ksp->ks_update = system_pages_kstat_update;
 577                 kstat_install(ksp);
 578         }
 579 
 580         ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
 581             pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
 582 
 583         if (ksp) {
 584                 ksp->ks_data = pollstats_ptr;
 585                 kstat_install(ksp);
 586         }
 587 }
 588 
 589 /*
 590  * Caller of this should ensure that the string pointed by src
 591  * doesn't change while kstat's lock is held. Not doing so defeats
 592  * kstat's snapshot strategy as explained in <sys/kstat.h>
 593  */
 594 void
 595 kstat_named_setstr(kstat_named_t *knp, const char *src)
 596 {
 597         if (knp->data_type != KSTAT_DATA_STRING)
 598                 panic("kstat_named_setstr('%p', '%p'): "
 599                     "named kstat is not of type KSTAT_DATA_STRING",
 600                     (void *)knp, (void *)src);
 601 
 602         KSTAT_NAMED_STR_PTR(knp) = (char *)src;
 603         if (src != NULL)
 604                 KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
 605         else
 606                 KSTAT_NAMED_STR_BUFLEN(knp) = 0;
 607 }
 608 
 609 void
 610 kstat_set_string(char *dst, const char *src)
 611 {
 612         bzero(dst, KSTAT_STRLEN);
 613         (void) strncpy(dst, src, KSTAT_STRLEN - 1);
 614 }
 615 
 616 void
 617 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
 618 {
 619         kstat_set_string(knp->name, name);
 620         knp->data_type = data_type;
 621 
 622         if (data_type == KSTAT_DATA_STRING)
 623                 kstat_named_setstr(knp, NULL);
 624 }
 625 
 626 void
 627 kstat_timer_init(kstat_timer_t *ktp, const char *name)
 628 {
 629         kstat_set_string(ktp->name, name);
 630 }
 631 
 632 /* ARGSUSED */
 633 static int
 634 default_kstat_update(kstat_t *ksp, int rw)
 635 {
 636         uint_t i;
 637         size_t len = 0;
 638         kstat_named_t *knp;
 639 
 640         /*
 641          * Named kstats with variable-length long strings have a standard
 642          * way of determining how much space is needed to hold the snapshot:
 643          */
 644         if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
 645             (ksp->ks_flags & (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_LONGSTRINGS))) {
 646 
 647                 /*
 648                  * Add in the space required for the strings
 649                  */
 650                 knp = KSTAT_NAMED_PTR(ksp);
 651                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 652                         if (knp->data_type == KSTAT_DATA_STRING)
 653                                 len += KSTAT_NAMED_STR_BUFLEN(knp);
 654                 }
 655                 ksp->ks_data_size =
 656                     ksp->ks_ndata * sizeof (kstat_named_t) + len;
 657         }
 658         return (0);
 659 }
 660 
 661 static int
 662 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
 663 {
 664         kstat_io_t *kiop;
 665         hrtime_t cur_time;
 666         size_t  namedsz;
 667 
 668         ksp->ks_snaptime = cur_time = gethrtime();
 669 
 670         if (rw == KSTAT_WRITE) {
 671                 if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
 672                         return (EACCES);
 673                 bcopy(buf, ksp->ks_data, ksp->ks_data_size);
 674                 return (0);
 675         }
 676 
 677         /*
 678          * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
 679          * number of kstat_named_t structures, followed by an optional
 680          * string segment. The ks_data generally holds only the
 681          * kstat_named_t structures. So we copy it first. The strings,
 682          * if any, are copied below. For other kstat types, ks_data holds the
 683          * entire buffer.
 684          */
 685 
 686         namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
 687         if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
 688                 bcopy(ksp->ks_data, buf, namedsz);
 689         else
 690                 bcopy(ksp->ks_data, buf, ksp->ks_data_size);
 691 
 692         /*
 693          * Apply kstat type-specific data massaging
 694          */
 695         switch (ksp->ks_type) {
 696 
 697         case KSTAT_TYPE_IO:
 698                 /*
 699                  * Normalize time units and deal with incomplete transactions
 700                  */
 701                 kiop = (kstat_io_t *)buf;
 702 
 703                 scalehrtime(&kiop->wtime);
 704                 scalehrtime(&kiop->wlentime);
 705                 scalehrtime(&kiop->wlastupdate);
 706                 scalehrtime(&kiop->rtime);
 707                 scalehrtime(&kiop->rlentime);
 708                 scalehrtime(&kiop->rlastupdate);
 709 
 710                 if (kiop->wcnt != 0) {
 711                         /* like kstat_waitq_exit */
 712                         hrtime_t wfix = cur_time - kiop->wlastupdate;
 713                         kiop->wlastupdate = cur_time;
 714                         kiop->wlentime += kiop->wcnt * wfix;
 715                         kiop->wtime += wfix;
 716                 }
 717 
 718                 if (kiop->rcnt != 0) {
 719                         /* like kstat_runq_exit */
 720                         hrtime_t rfix = cur_time - kiop->rlastupdate;
 721                         kiop->rlastupdate = cur_time;
 722                         kiop->rlentime += kiop->rcnt * rfix;
 723                         kiop->rtime += rfix;
 724                 }
 725                 break;
 726 
 727         case KSTAT_TYPE_NAMED:
 728                 /*
 729                  * Massage any long strings in at the end of the buffer
 730                  */
 731                 if (ksp->ks_data_size > namedsz) {
 732                         uint_t i;
 733                         kstat_named_t *knp = buf;
 734                         char *dst = (char *)(knp + ksp->ks_ndata);
 735                         /*
 736                          * Copy strings and update pointers
 737                          */
 738                         for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 739                                 if (knp->data_type == KSTAT_DATA_STRING &&
 740                                     KSTAT_NAMED_STR_PTR(knp) != NULL) {
 741                                         bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
 742                                             KSTAT_NAMED_STR_BUFLEN(knp));
 743                                         KSTAT_NAMED_STR_PTR(knp) = dst;
 744                                         dst += KSTAT_NAMED_STR_BUFLEN(knp);
 745                                 }
 746                         }
 747                         ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
 748                 }
 749                 break;
 750         }
 751         return (0);
 752 }
 753 
 754 static int
 755 header_kstat_update(kstat_t *header_ksp, int rw)
 756 {
 757         int nkstats = 0;
 758         ekstat_t *e;
 759         avl_tree_t *t = &kstat_avl_bykid;
 760         zoneid_t zoneid;
 761 
 762         if (rw == KSTAT_WRITE)
 763                 return (EACCES);
 764 
 765         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 766 
 767         zoneid = getzoneid();
 768         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 769                 if (kstat_zone_find((kstat_t *)e, zoneid) &&
 770                     (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
 771                         nkstats++;
 772                 }
 773         }
 774         header_ksp->ks_ndata = nkstats;
 775         header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
 776         return (0);
 777 }
 778 
 779 /*
 780  * Copy out the data section of kstat 0, which consists of the list
 781  * of all kstat headers.  By specification, these headers must be
 782  * copied out in order of increasing KID.
 783  */
 784 static int
 785 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
 786 {
 787         ekstat_t *e;
 788         avl_tree_t *t = &kstat_avl_bykid;
 789         zoneid_t zoneid;
 790 
 791         header_ksp->ks_snaptime = gethrtime();
 792 
 793         if (rw == KSTAT_WRITE)
 794                 return (EACCES);
 795 
 796         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 797 
 798         zoneid = getzoneid();
 799         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 800                 if (kstat_zone_find((kstat_t *)e, zoneid) &&
 801                     (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
 802                         bcopy(&e->e_ks, buf, sizeof (kstat_t));
 803                         buf = (char *)buf + sizeof (kstat_t);
 804                 }
 805         }
 806 
 807         return (0);
 808 }
 809 
 810 /* ARGSUSED */
 811 static int
 812 system_misc_kstat_update(kstat_t *ksp, int rw)
 813 {
 814         int myncpus = ncpus;
 815         int *loadavgp = &avenrun[0];
 816         time_t zone_boot_time;
 817         hrtime_t zone_boot_hrtime;
 818         clock_t zone_lbolt;
 819         hrtime_t zone_hrtime;
 820         size_t zone_nproc;
 821 
 822         if (rw == KSTAT_WRITE)
 823                 return (EACCES);
 824 
 825         if (!INGLOBALZONE(curproc)) {
 826                 /*
 827                  * Here we grab cpu_lock which is OK as long as no-one in the
 828                  * future attempts to lookup this particular kstat
 829                  * (unix:0:system_misc) while holding cpu_lock.
 830                  */
 831                 mutex_enter(&cpu_lock);
 832                 if (pool_pset_enabled()) {
 833                         myncpus = zone_ncpus_get(curproc->p_zone);
 834                         ASSERT(myncpus > 0);
 835                 }
 836                 mutex_exit(&cpu_lock);
 837                 loadavgp = &curproc->p_zone->zone_avenrun[0];
 838         }
 839 
 840         if (INGLOBALZONE(curproc)) {
 841                 zone_boot_time = boot_time;
 842                 zone_lbolt = ddi_get_lbolt();
 843                 zone_nproc = nproc;
 844                 zone_boot_hrtime = boot_hrtime;
 845         } else {
 846                 zone_boot_time = curproc->p_zone->zone_boot_time;
 847                 zone_boot_hrtime = curproc->p_zone->zone_boot_hrtime;
 848 
 849                 zone_hrtime = gethrtime();
 850                 zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
 851                     NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
 852                 mutex_enter(&curproc->p_zone->zone_nlwps_lock);
 853                 zone_nproc = curproc->p_zone->zone_nprocs;
 854                 mutex_exit(&curproc->p_zone->zone_nlwps_lock);
 855         }
 856 
 857         system_misc_kstat.ncpus.value.ui32              = (uint32_t)myncpus;
 858         system_misc_kstat.lbolt.value.ui32              = (uint32_t)zone_lbolt;
 859         system_misc_kstat.deficit.value.ui32            = (uint32_t)deficit;
 860         system_misc_kstat.clk_intr.value.ui32           = (uint32_t)zone_lbolt;
 861         system_misc_kstat.vac.value.ui32                = (uint32_t)vac;
 862         system_misc_kstat.nproc.value.ui32              = (uint32_t)zone_nproc;
 863         system_misc_kstat.avenrun_1min.value.ui32       = (uint32_t)loadavgp[0];
 864         system_misc_kstat.avenrun_5min.value.ui32       = (uint32_t)loadavgp[1];
 865         system_misc_kstat.avenrun_15min.value.ui32      = (uint32_t)loadavgp[2];
 866         system_misc_kstat.boot_time.value.ui32          = (uint32_t)
 867             zone_boot_time;
 868         system_misc_kstat.boot_hrtime.value.t           = zone_boot_hrtime;
 869         system_misc_kstat.nsec_per_tick.value.ui32      = (uint32_t)
 870             nsec_per_tick;
 871         return (0);
 872 }
 873 
 874 #ifdef  __sparc
 875 extern caddr_t  econtig32;
 876 #else   /* !__sparc */
 877 extern caddr_t  econtig;
 878 #endif  /* __sparc */
 879 
 880 /* ARGSUSED */
 881 static int
 882 system_pages_kstat_update(kstat_t *ksp, int rw)
 883 {
 884         kobj_stat_t kobj_stat;
 885 
 886         if (rw == KSTAT_WRITE) {
 887                 return (EACCES);
 888         }
 889 
 890         kobj_stat_get(&kobj_stat);
 891         system_pages_kstat.physmem.value.ul     = (ulong_t)physmem;
 892         system_pages_kstat.nalloc.value.ul      = kobj_stat.nalloc;
 893         system_pages_kstat.nfree.value.ul       = kobj_stat.nfree;
 894         system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
 895         system_pages_kstat.nfree_calls.value.ul = kobj_stat.nfree_calls;
 896         system_pages_kstat.kernelbase.value.ul  = (ulong_t)KERNELBASE;
 897 
 898 #ifdef  __sparc
 899         /*
 900          * kstat should REALLY be modified to also report kmem64_base and
 901          * kmem64_end (see sun4u/os/startup.c), as the virtual address range
 902          * [ kernelbase .. econtig ] no longer is truly reflective of the
 903          * kernel's vallocs...
 904          */
 905         system_pages_kstat.econtig.value.ul     = (ulong_t)econtig32;
 906 #else   /* !__sparc */
 907         system_pages_kstat.econtig.value.ul     = (ulong_t)econtig;
 908 #endif  /* __sparc */
 909 
 910         system_pages_kstat.freemem.value.ul     = (ulong_t)freemem;
 911         system_pages_kstat.availrmem.value.ul   = (ulong_t)availrmem;
 912         system_pages_kstat.lotsfree.value.ul    = (ulong_t)lotsfree;
 913         system_pages_kstat.desfree.value.ul     = (ulong_t)desfree;
 914         system_pages_kstat.minfree.value.ul     = (ulong_t)minfree;
 915         system_pages_kstat.fastscan.value.ul    = (ulong_t)fastscan;
 916         system_pages_kstat.slowscan.value.ul    = (ulong_t)slowscan;
 917         system_pages_kstat.nscan.value.ul       = (ulong_t)nscan;
 918         system_pages_kstat.desscan.value.ul     = (ulong_t)desscan;
 919         system_pages_kstat.pagesfree.value.ul   = (ulong_t)freemem;
 920         system_pages_kstat.pageslocked.value.ul = (ulong_t)(availrmem_initial -
 921             availrmem);
 922         system_pages_kstat.pagestotal.value.ul  = (ulong_t)total_pages;
 923         /*
 924          * pp_kernel represents total pages used by the kernel since the
 925          * startup. This formula takes into account the boottime kernel
 926          * footprint and also considers the availrmem changes because of
 927          * user explicit page locking.
 928          */
 929         system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
 930             obp_pages - availrmem - k_anoninfo.ani_mem_resv -
 931             anon_segkp_pages_locked - pages_locked -
 932             pages_claimed - pages_useclaim);
 933 
 934         return (0);
 935 }
 936 
 937 kstat_t *
 938 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
 939     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
 940 {
 941         return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
 942             ks_type, ks_ndata, ks_flags, ALL_ZONES));
 943 }
 944 
 945 /*
 946  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
 947  * the specified name exists, reactivate it.  Returns a pointer to the kstat
 948  * on success, NULL on failure.  The kstat will not be visible to the
 949  * kstat driver until kstat_install().
 950  */
 951 kstat_t *
 952 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
 953     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
 954     zoneid_t ks_zoneid)
 955 {
 956         size_t ks_data_size;
 957         kstat_t *ksp;
 958         ekstat_t *e;
 959         avl_index_t where;
 960         char namebuf[KSTAT_STRLEN + 16];
 961 
 962         if (avl_numnodes(&kstat_avl_bykid) == 0) {
 963                 avl_create(&kstat_avl_bykid, kstat_compare_bykid,
 964                     sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
 965 
 966                 avl_create(&kstat_avl_byname, kstat_compare_byname,
 967                     sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
 968         }
 969 
 970         /*
 971          * If ks_name == NULL, set the ks_name to <module><instance>.
 972          */
 973         if (ks_name == NULL) {
 974                 char buf[KSTAT_STRLEN];
 975                 kstat_set_string(buf, ks_module);
 976                 (void) sprintf(namebuf, "%s%d", buf, ks_instance);
 977                 ks_name = namebuf;
 978         }
 979 
 980         /*
 981          * Make sure it's a valid kstat data type
 982          */
 983         if (ks_type >= KSTAT_NUM_TYPES) {
 984                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 985                     "invalid kstat type %d",
 986                     ks_module, ks_instance, ks_name, ks_type);
 987                 return (NULL);
 988         }
 989 
 990         /*
 991          * Don't allow persistent virtual kstats -- it makes no sense.
 992          * ks_data points to garbage when the client goes away.
 993          */
 994         if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
 995             (ks_flags & KSTAT_FLAG_VIRTUAL)) {
 996                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 997                     "cannot create persistent virtual kstat",
 998                     ks_module, ks_instance, ks_name);
 999                 return (NULL);
1000         }
1001 
1002         /*
1003          * Don't allow variable-size physical kstats, since the framework's
1004          * memory allocation for physical kstat data is fixed at creation time.
1005          */
1006         if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
1007             !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1008                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1009                     "cannot create variable-size physical kstat",
1010                     ks_module, ks_instance, ks_name);
1011                 return (NULL);
1012         }
1013 
1014         /*
1015          * Make sure the number of data fields is within legal range
1016          */
1017         if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1018             ks_ndata > kstat_data_type[ks_type].max_ndata) {
1019                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1020                     "ks_ndata=%d out of range [%d, %d]",
1021                     ks_module, ks_instance, ks_name, (int)ks_ndata,
1022                     kstat_data_type[ks_type].min_ndata,
1023                     kstat_data_type[ks_type].max_ndata);
1024                 return (NULL);
1025         }
1026 
1027         ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1028 
1029         /*
1030          * If the named kstat already exists and is dormant, reactivate it.
1031          */
1032         ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1033         if (ksp != NULL) {
1034                 if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1035                         /*
1036                          * The named kstat exists but is not dormant --
1037                          * this is a kstat namespace collision.
1038                          */
1039                         kstat_rele(ksp);
1040                         cmn_err(CE_WARN,
1041                             "kstat_create('%s', %d, '%s'): namespace collision",
1042                             ks_module, ks_instance, ks_name);
1043                         return (NULL);
1044                 }
1045                 if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1046                     (ksp->ks_type != ks_type) ||
1047                     (ksp->ks_ndata != ks_ndata) ||
1048                     (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1049                         /*
1050                          * The name is the same, but the other key parameters
1051                          * differ from those of the dormant kstat -- bogus.
1052                          */
1053                         kstat_rele(ksp);
1054                         cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1055                             "invalid reactivation of dormant kstat",
1056                             ks_module, ks_instance, ks_name);
1057                         return (NULL);
1058                 }
1059                 /*
1060                  * Return dormant kstat pointer to caller.  As usual,
1061                  * the kstat is marked invalid until kstat_install().
1062                  */
1063                 ksp->ks_flags |= KSTAT_FLAG_INVALID;
1064                 kstat_rele(ksp);
1065                 return (ksp);
1066         }
1067 
1068         /*
1069          * Allocate memory for the new kstat header and, if this is a physical
1070          * kstat, the data section.
1071          */
1072         e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1073         if (e == NULL) {
1074                 cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1075                     "insufficient kernel memory",
1076                     ks_module, ks_instance, ks_name);
1077                 return (NULL);
1078         }
1079 
1080         /*
1081          * Initialize as many fields as we can.  The caller may reset
1082          * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1083          * Creators of virtual kstats may also reset ks_data.  It is
1084          * also up to the caller to initialize the kstat data section,
1085          * if necessary.  All initialization must be complete before
1086          * calling kstat_install().
1087          */
1088         e->e_zone.zoneid = ks_zoneid;
1089         e->e_zone.next = NULL;
1090 
1091         ksp = &e->e_ks;
1092         ksp->ks_crtime               = gethrtime();
1093         kstat_set_string(ksp->ks_module, ks_module);
1094         ksp->ks_instance     = ks_instance;
1095         kstat_set_string(ksp->ks_name, ks_name);
1096         ksp->ks_type         = ks_type;
1097         kstat_set_string(ksp->ks_class, ks_class);
1098         ksp->ks_flags                = ks_flags | KSTAT_FLAG_INVALID;
1099         if (ks_flags & KSTAT_FLAG_VIRTUAL)
1100                 ksp->ks_data = NULL;
1101         else
1102                 ksp->ks_data = (void *)(e + 1);
1103         ksp->ks_ndata                = ks_ndata;
1104         ksp->ks_data_size    = ks_data_size;
1105         ksp->ks_snaptime     = ksp->ks_crtime;
1106         ksp->ks_update               = default_kstat_update;
1107         ksp->ks_private              = NULL;
1108         ksp->ks_snapshot     = default_kstat_snapshot;
1109         ksp->ks_lock         = NULL;
1110 
1111         mutex_enter(&kstat_chain_lock);
1112 
1113         /*
1114          * Add our kstat to the AVL trees.
1115          */
1116         if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1117                 mutex_exit(&kstat_chain_lock);
1118                 cmn_err(CE_WARN,
1119                     "kstat_create('%s', %d, '%s'): namespace collision",
1120                     ks_module, ks_instance, ks_name);
1121                 kstat_free(e);
1122                 return (NULL);
1123         }
1124         avl_insert(&kstat_avl_byname, e, where);
1125 
1126         /*
1127          * Loop around until we find an unused KID.
1128          */
1129         do {
1130                 ksp->ks_kid = kstat_chain_id++;
1131         } while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1132         avl_insert(&kstat_avl_bykid, e, where);
1133 
1134         mutex_exit(&kstat_chain_lock);
1135 
1136         return (ksp);
1137 }
1138 
1139 /*
1140  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1141  */
1142 void
1143 kstat_install(kstat_t *ksp)
1144 {
1145         zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1146 
1147         /*
1148          * If this is a variable-size kstat, it MUST provide kstat data locking
1149          * to prevent data-size races with kstat readers.
1150          */
1151         if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1152                 panic("kstat_install('%s', %d, '%s'): "
1153                     "cannot create variable-size kstat without data lock",
1154                     ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1155         }
1156 
1157         if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1158                 cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1159                     (void *)ksp);
1160                 return;
1161         }
1162 
1163         if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1164                 uint_t i;
1165                 kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1166 
1167                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1168                         if (knp->data_type == KSTAT_DATA_STRING) {
1169                                 ksp->ks_flags |= KSTAT_FLAG_LONGSTRINGS;
1170                                 break;
1171                         }
1172                 }
1173                 /*
1174                  * The default snapshot routine does not handle KSTAT_WRITE
1175                  * for long strings.
1176                  */
1177                 if ((ksp->ks_flags & KSTAT_FLAG_LONGSTRINGS) &&
1178                     (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1179                     (ksp->ks_snapshot == default_kstat_snapshot)) {
1180                         panic("kstat_install('%s', %d, '%s'): "
1181                             "named kstat containing KSTAT_DATA_STRING "
1182                             "is writable but uses default snapshot routine",
1183                             ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1184                 }
1185         }
1186 
1187         if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1188 
1189                 /*
1190                  * We are reactivating a dormant kstat.  Initialize the
1191                  * caller's underlying data to the value it had when the
1192                  * kstat went dormant, and mark the kstat as active.
1193                  * Grab the provider's kstat lock if it's not already held.
1194                  */
1195                 kmutex_t *lp = ksp->ks_lock;
1196                 if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1197                         mutex_enter(lp);
1198                         (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1199                         mutex_exit(lp);
1200                 } else {
1201                         (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1202                 }
1203                 ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1204         }
1205 
1206         /*
1207          * Now that the kstat is active, make it visible to the kstat driver.
1208          * When copying out kstats the count is determined in
1209          * header_kstat_update() and actually copied into kbuf in
1210          * header_kstat_snapshot(). kstat_chain_lock is held across the two
1211          * calls to ensure that this list doesn't change. Thus, we need to
1212          * also take the lock to ensure that the we don't copy the new kstat
1213          * in the 2nd pass and overrun the buf.
1214          */
1215         mutex_enter(&kstat_chain_lock);
1216         ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1217         mutex_exit(&kstat_chain_lock);
1218         kstat_rele(ksp);
1219 }
1220 
1221 /*
1222  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1223  * just update the data and mark it as dormant.
1224  */
1225 void
1226 kstat_delete(kstat_t *ksp)
1227 {
1228         kmutex_t *lp;
1229         ekstat_t *e = (ekstat_t *)ksp;
1230         zoneid_t zoneid;
1231         kstat_zone_t *kz;
1232 
1233         ASSERT(ksp != NULL);
1234 
1235         if (ksp == NULL)
1236                 return;
1237 
1238         zoneid = e->e_zone.zoneid;
1239 
1240         lp = ksp->ks_lock;
1241 
1242         if (lp != NULL && MUTEX_HELD(lp)) {
1243                 panic("kstat_delete(%p): caller holds data lock %p",
1244                     (void *)ksp, (void *)lp);
1245         }
1246 
1247         if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1248                 cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1249                     (void *)ksp);
1250                 return;
1251         }
1252 
1253         if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1254                 /*
1255                  * Update the data one last time, so that all activity
1256                  * prior to going dormant has been accounted for.
1257                  */
1258                 KSTAT_ENTER(ksp);
1259                 (void) KSTAT_UPDATE(ksp, KSTAT_READ);
1260                 KSTAT_EXIT(ksp);
1261 
1262                 /*
1263                  * Mark the kstat as dormant and restore caller-modifiable
1264                  * fields to default values, so the kstat is readable during
1265                  * the dormant phase.
1266                  */
1267                 ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1268                 ksp->ks_lock = NULL;
1269                 ksp->ks_update = default_kstat_update;
1270                 ksp->ks_private = NULL;
1271                 ksp->ks_snapshot = default_kstat_snapshot;
1272                 kstat_rele(ksp);
1273                 return;
1274         }
1275 
1276         /*
1277          * Remove the kstat from the framework's AVL trees,
1278          * free the allocated memory, and increment kstat_chain_id so
1279          * /dev/kstat clients can detect the event.
1280          */
1281         mutex_enter(&kstat_chain_lock);
1282         avl_remove(&kstat_avl_bykid, e);
1283         avl_remove(&kstat_avl_byname, e);
1284         kstat_chain_id++;
1285         mutex_exit(&kstat_chain_lock);
1286 
1287         kz = e->e_zone.next;
1288         while (kz != NULL) {
1289                 kstat_zone_t *t = kz;
1290 
1291                 kz = kz->next;
1292                 kmem_free(t, sizeof (*t));
1293         }
1294         kstat_rele(ksp);
1295         kstat_free(e);
1296 }
1297 
1298 void
1299 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1300     const char *ks_name, zoneid_t ks_zoneid)
1301 {
1302         kstat_t *ksp;
1303 
1304         ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1305         if (ksp != NULL) {
1306                 kstat_rele(ksp);
1307                 kstat_delete(ksp);
1308         }
1309 }
1310 
1311 void
1312 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1313 {
1314         kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1315 }
1316 
1317 /*
1318  * The sparc V9 versions of these routines can be much cheaper than
1319  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1320  * For simplicity, however, we always feed the C versions to lint.
1321  */
1322 #if !defined(__sparc) || defined(lint) || defined(__lint)
1323 
1324 void
1325 kstat_waitq_enter(kstat_io_t *kiop)
1326 {
1327         hrtime_t new, delta;
1328         ulong_t wcnt;
1329 
1330         new = gethrtime_unscaled();
1331         delta = new - kiop->wlastupdate;
1332         kiop->wlastupdate = new;
1333         wcnt = kiop->wcnt++;
1334         if (wcnt != 0) {
1335                 kiop->wlentime += delta * wcnt;
1336                 kiop->wtime += delta;
1337         }
1338 }
1339 
1340 void
1341 kstat_waitq_exit(kstat_io_t *kiop)
1342 {
1343         hrtime_t new, delta;
1344         ulong_t wcnt;
1345 
1346         new = gethrtime_unscaled();
1347         delta = new - kiop->wlastupdate;
1348         kiop->wlastupdate = new;
1349         wcnt = kiop->wcnt--;
1350         ASSERT((int)wcnt > 0);
1351         kiop->wlentime += delta * wcnt;
1352         kiop->wtime += delta;
1353 }
1354 
1355 void
1356 kstat_runq_enter(kstat_io_t *kiop)
1357 {
1358         hrtime_t new, delta;
1359         ulong_t rcnt;
1360 
1361         new = gethrtime_unscaled();
1362         delta = new - kiop->rlastupdate;
1363         kiop->rlastupdate = new;
1364         rcnt = kiop->rcnt++;
1365         if (rcnt != 0) {
1366                 kiop->rlentime += delta * rcnt;
1367                 kiop->rtime += delta;
1368         }
1369 }
1370 
1371 void
1372 kstat_runq_exit(kstat_io_t *kiop)
1373 {
1374         hrtime_t new, delta;
1375         ulong_t rcnt;
1376 
1377         new = gethrtime_unscaled();
1378         delta = new - kiop->rlastupdate;
1379         kiop->rlastupdate = new;
1380         rcnt = kiop->rcnt--;
1381         ASSERT((int)rcnt > 0);
1382         kiop->rlentime += delta * rcnt;
1383         kiop->rtime += delta;
1384 }
1385 
1386 void
1387 kstat_waitq_to_runq(kstat_io_t *kiop)
1388 {
1389         hrtime_t new, delta;
1390         ulong_t wcnt, rcnt;
1391 
1392         new = gethrtime_unscaled();
1393 
1394         delta = new - kiop->wlastupdate;
1395         kiop->wlastupdate = new;
1396         wcnt = kiop->wcnt--;
1397         ASSERT((int)wcnt > 0);
1398         kiop->wlentime += delta * wcnt;
1399         kiop->wtime += delta;
1400 
1401         delta = new - kiop->rlastupdate;
1402         kiop->rlastupdate = new;
1403         rcnt = kiop->rcnt++;
1404         if (rcnt != 0) {
1405                 kiop->rlentime += delta * rcnt;
1406                 kiop->rtime += delta;
1407         }
1408 }
1409 
1410 void
1411 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1412 {
1413         hrtime_t new, delta;
1414         ulong_t wcnt, rcnt;
1415 
1416         new = gethrtime_unscaled();
1417 
1418         delta = new - kiop->rlastupdate;
1419         kiop->rlastupdate = new;
1420         rcnt = kiop->rcnt--;
1421         ASSERT((int)rcnt > 0);
1422         kiop->rlentime += delta * rcnt;
1423         kiop->rtime += delta;
1424 
1425         delta = new - kiop->wlastupdate;
1426         kiop->wlastupdate = new;
1427         wcnt = kiop->wcnt++;
1428         if (wcnt != 0) {
1429                 kiop->wlentime += delta * wcnt;
1430                 kiop->wtime += delta;
1431         }
1432 }
1433 
1434 #endif
1435 
1436 void
1437 kstat_timer_start(kstat_timer_t *ktp)
1438 {
1439         ktp->start_time = gethrtime();
1440 }
1441 
1442 void
1443 kstat_timer_stop(kstat_timer_t *ktp)
1444 {
1445         hrtime_t        etime;
1446         u_longlong_t    num_events;
1447 
1448         ktp->stop_time = etime = gethrtime();
1449         etime -= ktp->start_time;
1450         num_events = ktp->num_events;
1451         if (etime < ktp->min_time || num_events == 0)
1452                 ktp->min_time = etime;
1453         if (etime > ktp->max_time)
1454                 ktp->max_time = etime;
1455         ktp->elapsed_time += etime;
1456         ktp->num_events = num_events + 1;
1457 }