1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * Kernel statistics framework
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/time.h>
  31 #include <sys/systm.h>
  32 #include <sys/vmsystm.h>
  33 #include <sys/t_lock.h>
  34 #include <sys/param.h>
  35 #include <sys/errno.h>
  36 #include <sys/vmem.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/cmn_err.h>
  39 #include <sys/kstat.h>
  40 #include <sys/sysinfo.h>
  41 #include <sys/cpuvar.h>
  42 #include <sys/fcntl.h>
  43 #include <sys/flock.h>
  44 #include <sys/vnode.h>
  45 #include <sys/vfs.h>
  46 #include <sys/dnlc.h>
  47 #include <sys/var.h>
  48 #include <sys/debug.h>
  49 #include <sys/kobj.h>
  50 #include <sys/avl.h>
  51 #include <sys/pool_pset.h>
  52 #include <sys/cpupart.h>
  53 #include <sys/zone.h>
  54 #include <sys/loadavg.h>
  55 #include <vm/page.h>
  56 #include <vm/anon.h>
  57 #include <vm/seg_kmem.h>
  58 
  59 /*
  60  * Global lock to protect the AVL trees and kstat_chain_id.
  61  */
  62 static kmutex_t kstat_chain_lock;
  63 
  64 /*
  65  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
  66  *
  67  * (1)  /dev/kstat, to detect changes in the kstat chain across ioctls;
  68  *
  69  * (2)  kstat_create(), to assign a KID (kstat ID) to each new kstat.
  70  *      /dev/kstat uses the KID as a cookie for kstat lookups.
  71  *
  72  * We reserve the first two IDs because some kstats are created before
  73  * the well-known ones (kstat_headers = 0, kstat_types = 1).
  74  *
  75  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
  76  * into a particular kstat, which is logically equivalent to a kstat being
  77  * installed/deleted.
  78  */
  79 
  80 kid_t kstat_chain_id = 2;
  81 
  82 /*
  83  * As far as zones are concerned, there are 3 types of kstat:
  84  *
  85  * 1) Those which have a well-known name, and which should return per-zone data
  86  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
  87  * is an example of this type of kstat.
  88  *
  89  * 2) Those which should only be exported to a particular list of zones.
  90  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
  91  * able to see NFS mounts associated with zone B, while we want the
  92  * global zone to be able to see all mounts on the system.
  93  *
  94  * 3) Those that can be exported to all zones.  Most system-related
  95  * kstats fall within this category.
  96  *
  97  * An ekstat_t thus contains a list of kstats that the zone is to be
  98  * exported to.  The lookup of a name:instance:module thus translates to a
  99  * lookup of name:instance:module:myzone; if the kstat is not exported
 100  * to all zones, and does not have the caller's zoneid explicitly
 101  * enumerated in the list of zones to be exported to, it is the same as
 102  * if the kstat didn't exist.
 103  *
 104  * Writing to kstats is currently disallowed from within a non-global
 105  * zone, although this restriction could be removed in the future.
 106  */
 107 typedef struct kstat_zone {
 108         zoneid_t zoneid;
 109         struct kstat_zone *next;
 110 } kstat_zone_t;
 111 
 112 /*
 113  * Extended kstat structure -- for internal use only.
 114  */
 115 typedef struct ekstat {
 116         kstat_t         e_ks;           /* the kstat itself */
 117         size_t          e_size;         /* total allocation size */
 118         kthread_t       *e_owner;       /* thread holding this kstat */
 119         kcondvar_t      e_cv;           /* wait for owner == NULL */
 120         avl_node_t      e_avl_bykid;    /* AVL tree to sort by KID */
 121         avl_node_t      e_avl_byname;   /* AVL tree to sort by name */
 122         kstat_zone_t    e_zone;         /* zone to export stats to */
 123 } ekstat_t;
 124 
 125 static uint64_t kstat_initial[8192];
 126 static void *kstat_initial_ptr = kstat_initial;
 127 static size_t kstat_initial_avail = sizeof (kstat_initial);
 128 static vmem_t *kstat_arena;
 129 
 130 #define KSTAT_ALIGN     (sizeof (uint64_t))
 131 
 132 static avl_tree_t kstat_avl_bykid;
 133 static avl_tree_t kstat_avl_byname;
 134 
 135 /*
 136  * Various pointers we need to create kstats at boot time in kstat_init()
 137  */
 138 extern  kstat_named_t   *segmapcnt_ptr;
 139 extern  uint_t          segmapcnt_ndata;
 140 extern  int             segmap_kstat_update(kstat_t *, int);
 141 extern  kstat_named_t   *biostats_ptr;
 142 extern  uint_t          biostats_ndata;
 143 extern  kstat_named_t   *pollstats_ptr;
 144 extern  uint_t          pollstats_ndata;
 145 
 146 extern  int     vac;
 147 extern  uint_t  nproc;
 148 extern  time_t  boot_time;
 149 extern  sysinfo_t       sysinfo;
 150 extern  vminfo_t        vminfo;
 151 
 152 struct {
 153         kstat_named_t ncpus;
 154         kstat_named_t lbolt;
 155         kstat_named_t deficit;
 156         kstat_named_t clk_intr;
 157         kstat_named_t vac;
 158         kstat_named_t nproc;
 159         kstat_named_t avenrun_1min;
 160         kstat_named_t avenrun_5min;
 161         kstat_named_t avenrun_15min;
 162         kstat_named_t boot_time;
 163 } system_misc_kstat = {
 164         { "ncpus",              KSTAT_DATA_UINT32 },
 165         { "lbolt",              KSTAT_DATA_UINT32 },
 166         { "deficit",            KSTAT_DATA_UINT32 },
 167         { "clk_intr",           KSTAT_DATA_UINT32 },
 168         { "vac",                KSTAT_DATA_UINT32 },
 169         { "nproc",              KSTAT_DATA_UINT32 },
 170         { "avenrun_1min",       KSTAT_DATA_UINT32 },
 171         { "avenrun_5min",       KSTAT_DATA_UINT32 },
 172         { "avenrun_15min",      KSTAT_DATA_UINT32 },
 173         { "boot_time",          KSTAT_DATA_UINT32 },
 174 };
 175 
 176 struct {
 177         kstat_named_t physmem;
 178         kstat_named_t nalloc;
 179         kstat_named_t nfree;
 180         kstat_named_t nalloc_calls;
 181         kstat_named_t nfree_calls;
 182         kstat_named_t kernelbase;
 183         kstat_named_t econtig;
 184         kstat_named_t freemem;
 185         kstat_named_t availrmem;
 186         kstat_named_t lotsfree;
 187         kstat_named_t desfree;
 188         kstat_named_t minfree;
 189         kstat_named_t fastscan;
 190         kstat_named_t slowscan;
 191         kstat_named_t nscan;
 192         kstat_named_t desscan;
 193         kstat_named_t pp_kernel;
 194         kstat_named_t pagesfree;
 195         kstat_named_t pageslocked;
 196         kstat_named_t pagestotal;
 197 } system_pages_kstat = {
 198         { "physmem",            KSTAT_DATA_ULONG },
 199         { "nalloc",             KSTAT_DATA_ULONG },
 200         { "nfree",              KSTAT_DATA_ULONG },
 201         { "nalloc_calls",       KSTAT_DATA_ULONG },
 202         { "nfree_calls",        KSTAT_DATA_ULONG },
 203         { "kernelbase",         KSTAT_DATA_ULONG },
 204         { "econtig",            KSTAT_DATA_ULONG },
 205         { "freemem",            KSTAT_DATA_ULONG },
 206         { "availrmem",          KSTAT_DATA_ULONG },
 207         { "lotsfree",           KSTAT_DATA_ULONG },
 208         { "desfree",            KSTAT_DATA_ULONG },
 209         { "minfree",            KSTAT_DATA_ULONG },
 210         { "fastscan",           KSTAT_DATA_ULONG },
 211         { "slowscan",           KSTAT_DATA_ULONG },
 212         { "nscan",              KSTAT_DATA_ULONG },
 213         { "desscan",            KSTAT_DATA_ULONG },
 214         { "pp_kernel",          KSTAT_DATA_ULONG },
 215         { "pagesfree",          KSTAT_DATA_ULONG },
 216         { "pageslocked",        KSTAT_DATA_ULONG },
 217         { "pagestotal",         KSTAT_DATA_ULONG },
 218 };
 219 
 220 static int header_kstat_update(kstat_t *, int);
 221 static int header_kstat_snapshot(kstat_t *, void *, int);
 222 static int system_misc_kstat_update(kstat_t *, int);
 223 static int system_pages_kstat_update(kstat_t *, int);
 224 
 225 static struct {
 226         char    name[KSTAT_STRLEN];
 227         size_t  size;
 228         uint_t  min_ndata;
 229         uint_t  max_ndata;
 230 } kstat_data_type[KSTAT_NUM_TYPES] = {
 231         { "raw",                1,                      0,      INT_MAX },
 232         { "name=value",         sizeof (kstat_named_t), 0,      INT_MAX },
 233         { "interrupt",          sizeof (kstat_intr_t),  1,      1       },
 234         { "i/o",                sizeof (kstat_io_t),    1,      1       },
 235         { "event_timer",        sizeof (kstat_timer_t), 0,      INT_MAX },
 236 };
 237 
 238 int
 239 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
 240 {
 241         ekstat_t *e = (ekstat_t *)k;
 242         kstat_zone_t *kz;
 243 
 244         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 245         for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
 246                 if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
 247                         return (1);
 248                 if (zoneid == kz->zoneid)
 249                         return (1);
 250         }
 251         return (0);
 252 }
 253 
 254 void
 255 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
 256 {
 257         ekstat_t *e = (ekstat_t *)k;
 258         kstat_zone_t *kz, *t = NULL;
 259 
 260         mutex_enter(&kstat_chain_lock);
 261         if (zoneid == e->e_zone.zoneid) {
 262                 kz = e->e_zone.next;
 263                 ASSERT(kz != NULL);
 264                 e->e_zone.zoneid = kz->zoneid;
 265                 e->e_zone.next = kz->next;
 266                 goto out;
 267         }
 268         for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
 269                 if (kz->next->zoneid == zoneid) {
 270                         t = kz->next;
 271                         kz->next = t->next;
 272                         break;
 273                 }
 274         }
 275         ASSERT(t != NULL);      /* we removed something */
 276         kz = t;
 277 out:
 278         kstat_chain_id++;
 279         mutex_exit(&kstat_chain_lock);
 280         kmem_free(kz, sizeof (*kz));
 281 }
 282 
 283 void
 284 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
 285 {
 286         ekstat_t *e = (ekstat_t *)k;
 287         kstat_zone_t *kz;
 288 
 289         kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
 290         if (kz == NULL)
 291                 return;
 292         mutex_enter(&kstat_chain_lock);
 293         kz->zoneid = zoneid;
 294         kz->next = e->e_zone.next;
 295         e->e_zone.next = kz;
 296         kstat_chain_id++;
 297         mutex_exit(&kstat_chain_lock);
 298 }
 299 
 300 /*
 301  * Compare the list of zones for the given kstats, returning 0 if they match
 302  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
 303  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
 304  * two lists always has one element, and this is an O(n) operation rather than
 305  * O(n^2).
 306  */
 307 static int
 308 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
 309 {
 310         kstat_zone_t *kz1, *kz2;
 311 
 312         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 313         for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
 314                 for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
 315                         if (kz1->zoneid == ALL_ZONES ||
 316                             kz2->zoneid == ALL_ZONES)
 317                                 return (0);
 318                         if (kz1->zoneid == kz2->zoneid)
 319                                 return (0);
 320                 }
 321         }
 322         return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
 323 }
 324 
 325 /*
 326  * Support for keeping kstats sorted in AVL trees for fast lookups.
 327  */
 328 static int
 329 kstat_compare_bykid(const void *a1, const void *a2)
 330 {
 331         const kstat_t *k1 = a1;
 332         const kstat_t *k2 = a2;
 333 
 334         if (k1->ks_kid < k2->ks_kid)
 335                 return (-1);
 336         if (k1->ks_kid > k2->ks_kid)
 337                 return (1);
 338         return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 339 }
 340 
 341 static int
 342 kstat_compare_byname(const void *a1, const void *a2)
 343 {
 344         const kstat_t *k1 = a1;
 345         const kstat_t *k2 = a2;
 346         int s;
 347 
 348         s = strcmp(k1->ks_module, k2->ks_module);
 349         if (s > 0)
 350                 return (1);
 351         if (s < 0)
 352                 return (-1);
 353 
 354         if (k1->ks_instance < k2->ks_instance)
 355                 return (-1);
 356         if (k1->ks_instance > k2->ks_instance)
 357                 return (1);
 358 
 359         s = strcmp(k1->ks_name, k2->ks_name);
 360         if (s > 0)
 361                 return (1);
 362         if (s < 0)
 363                 return (-1);
 364 
 365         return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 366 }
 367 
 368 static kstat_t *
 369 kstat_hold(avl_tree_t *t, ekstat_t *template)
 370 {
 371         kstat_t *ksp;
 372         ekstat_t *e;
 373 
 374         mutex_enter(&kstat_chain_lock);
 375         for (;;) {
 376                 ksp = avl_find(t, template, NULL);
 377                 if (ksp == NULL)
 378                         break;
 379                 e = (ekstat_t *)ksp;
 380                 if (e->e_owner == NULL) {
 381                         e->e_owner = curthread;
 382                         break;
 383                 }
 384                 cv_wait(&e->e_cv, &kstat_chain_lock);
 385         }
 386         mutex_exit(&kstat_chain_lock);
 387         return (ksp);
 388 }
 389 
 390 void
 391 kstat_rele(kstat_t *ksp)
 392 {
 393         ekstat_t *e = (ekstat_t *)ksp;
 394 
 395         mutex_enter(&kstat_chain_lock);
 396         ASSERT(e->e_owner == curthread);
 397         e->e_owner = NULL;
 398         cv_broadcast(&e->e_cv);
 399         mutex_exit(&kstat_chain_lock);
 400 }
 401 
 402 kstat_t *
 403 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
 404 {
 405         ekstat_t e;
 406 
 407         e.e_ks.ks_kid = kid;
 408         e.e_zone.zoneid = zoneid;
 409         e.e_zone.next = NULL;
 410 
 411         return (kstat_hold(&kstat_avl_bykid, &e));
 412 }
 413 
 414 kstat_t *
 415 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
 416     zoneid_t ks_zoneid)
 417 {
 418         ekstat_t e;
 419 
 420         kstat_set_string(e.e_ks.ks_module, ks_module);
 421         e.e_ks.ks_instance = ks_instance;
 422         kstat_set_string(e.e_ks.ks_name, ks_name);
 423         e.e_zone.zoneid = ks_zoneid;
 424         e.e_zone.next = NULL;
 425         return (kstat_hold(&kstat_avl_byname, &e));
 426 }
 427 
 428 static ekstat_t *
 429 kstat_alloc(size_t size)
 430 {
 431         ekstat_t *e = NULL;
 432 
 433         size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
 434 
 435         if (kstat_arena == NULL) {
 436                 if (size <= kstat_initial_avail) {
 437                         e = kstat_initial_ptr;
 438                         kstat_initial_ptr = (char *)kstat_initial_ptr + size;
 439                         kstat_initial_avail -= size;
 440                 }
 441         } else {
 442                 e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
 443         }
 444 
 445         if (e != NULL) {
 446                 bzero(e, size);
 447                 e->e_size = size;
 448                 cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
 449         }
 450 
 451         return (e);
 452 }
 453 
 454 static void
 455 kstat_free(ekstat_t *e)
 456 {
 457         cv_destroy(&e->e_cv);
 458         vmem_free(kstat_arena, e, e->e_size);
 459 }
 460 
 461 /*
 462  * Create various system kstats.
 463  */
 464 void
 465 kstat_init(void)
 466 {
 467         kstat_t *ksp;
 468         ekstat_t *e;
 469         avl_tree_t *t = &kstat_avl_bykid;
 470 
 471         /*
 472          * Set up the kstat vmem arena.
 473          */
 474         kstat_arena = vmem_create("kstat",
 475             kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
 476             segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
 477 
 478         /*
 479          * Make initial kstats appear as though they were allocated.
 480          */
 481         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
 482                 (void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
 483                     0, 0, e, (char *)e + e->e_size,
 484                     VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 485 
 486         /*
 487          * The mother of all kstats.  The first kstat in the system, which
 488          * always has KID 0, has the headers for all kstats (including itself)
 489          * as its data.  Thus, the kstat driver does not need any special
 490          * interface to extract the kstat chain.
 491          */
 492         kstat_chain_id = 0;
 493         ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
 494             0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
 495         if (ksp) {
 496                 ksp->ks_lock = &kstat_chain_lock;
 497                 ksp->ks_update = header_kstat_update;
 498                 ksp->ks_snapshot = header_kstat_snapshot;
 499                 kstat_install(ksp);
 500         } else {
 501                 panic("cannot create kstat 'kstat_headers'");
 502         }
 503 
 504         ksp = kstat_create("unix", 0, "kstat_types", "kstat",
 505             KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
 506         if (ksp) {
 507                 int i;
 508                 kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
 509 
 510                 for (i = 0; i < KSTAT_NUM_TYPES; i++) {
 511                         kstat_named_init(&kn[i], kstat_data_type[i].name,
 512                             KSTAT_DATA_ULONG);
 513                         kn[i].value.ul = i;
 514                 }
 515                 kstat_install(ksp);
 516         }
 517 
 518         ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
 519             sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
 520         if (ksp) {
 521                 ksp->ks_data = (void *) &sysinfo;
 522                 kstat_install(ksp);
 523         }
 524 
 525         ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
 526             sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
 527         if (ksp) {
 528                 ksp->ks_data = (void *) &vminfo;
 529                 kstat_install(ksp);
 530         }
 531 
 532         ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
 533             segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
 534         if (ksp) {
 535                 ksp->ks_data = (void *) segmapcnt_ptr;
 536                 ksp->ks_update = segmap_kstat_update;
 537                 kstat_install(ksp);
 538         }
 539 
 540         ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
 541             biostats_ndata, KSTAT_FLAG_VIRTUAL);
 542         if (ksp) {
 543                 ksp->ks_data = (void *) biostats_ptr;
 544                 kstat_install(ksp);
 545         }
 546 
 547         ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
 548             sizeof (struct var), KSTAT_FLAG_VIRTUAL);
 549         if (ksp) {
 550                 ksp->ks_data = (void *) &v;
 551                 kstat_install(ksp);
 552         }
 553 
 554         ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
 555             sizeof (system_misc_kstat) / sizeof (kstat_named_t),
 556             KSTAT_FLAG_VIRTUAL);
 557         if (ksp) {
 558                 ksp->ks_data = (void *) &system_misc_kstat;
 559                 ksp->ks_update = system_misc_kstat_update;
 560                 kstat_install(ksp);
 561         }
 562 
 563         ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
 564             sizeof (system_pages_kstat) / sizeof (kstat_named_t),
 565             KSTAT_FLAG_VIRTUAL);
 566         if (ksp) {
 567                 ksp->ks_data = (void *) &system_pages_kstat;
 568                 ksp->ks_update = system_pages_kstat_update;
 569                 kstat_install(ksp);
 570         }
 571 
 572         ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
 573             pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
 574 
 575         if (ksp) {
 576                 ksp->ks_data = pollstats_ptr;
 577                 kstat_install(ksp);
 578         }
 579 }
 580 
 581 /*
 582  * Caller of this should ensure that the string pointed by src
 583  * doesn't change while kstat's lock is held. Not doing so defeats
 584  * kstat's snapshot strategy as explained in <sys/kstat.h>
 585  */
 586 void
 587 kstat_named_setstr(kstat_named_t *knp, const char *src)
 588 {
 589         if (knp->data_type != KSTAT_DATA_STRING)
 590                 panic("kstat_named_setstr('%p', '%p'): "
 591                     "named kstat is not of type KSTAT_DATA_STRING",
 592                     (void *)knp, (void *)src);
 593 
 594         KSTAT_NAMED_STR_PTR(knp) = (char *)src;
 595         if (src != NULL)
 596                 KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
 597         else
 598                 KSTAT_NAMED_STR_BUFLEN(knp) = 0;
 599 }
 600 
 601 void
 602 kstat_set_string(char *dst, const char *src)
 603 {
 604         bzero(dst, KSTAT_STRLEN);
 605         (void) strncpy(dst, src, KSTAT_STRLEN - 1);
 606 }
 607 
 608 void
 609 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
 610 {
 611         kstat_set_string(knp->name, name);
 612         knp->data_type = data_type;
 613 
 614         if (data_type == KSTAT_DATA_STRING)
 615                 kstat_named_setstr(knp, NULL);
 616 }
 617 
 618 void
 619 kstat_timer_init(kstat_timer_t *ktp, const char *name)
 620 {
 621         kstat_set_string(ktp->name, name);
 622 }
 623 
 624 /* ARGSUSED */
 625 static int
 626 default_kstat_update(kstat_t *ksp, int rw)
 627 {
 628         uint_t i;
 629         size_t len = 0;
 630         kstat_named_t *knp;
 631 
 632         /*
 633          * Named kstats with variable-length long strings have a standard
 634          * way of determining how much space is needed to hold the snapshot:
 635          */
 636         if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
 637             (ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
 638 
 639                 /*
 640                  * Add in the space required for the strings
 641                  */
 642                 knp = KSTAT_NAMED_PTR(ksp);
 643                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 644                         if (knp->data_type == KSTAT_DATA_STRING)
 645                                 len += KSTAT_NAMED_STR_BUFLEN(knp);
 646                 }
 647                 ksp->ks_data_size =
 648                     ksp->ks_ndata * sizeof (kstat_named_t) + len;
 649         }
 650         return (0);
 651 }
 652 
 653 static int
 654 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
 655 {
 656         kstat_io_t *kiop;
 657         hrtime_t cur_time;
 658         size_t  namedsz;
 659 
 660         ksp->ks_snaptime = cur_time = gethrtime();
 661 
 662         if (rw == KSTAT_WRITE) {
 663                 if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
 664                         return (EACCES);
 665                 bcopy(buf, ksp->ks_data, ksp->ks_data_size);
 666                 return (0);
 667         }
 668 
 669         /*
 670          * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
 671          * number of kstat_named_t structures, followed by an optional
 672          * string segment. The ks_data generally holds only the
 673          * kstat_named_t structures. So we copy it first. The strings,
 674          * if any, are copied below. For other kstat types, ks_data holds the
 675          * entire buffer.
 676          */
 677 
 678         namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
 679         if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
 680                 bcopy(ksp->ks_data, buf, namedsz);
 681         else
 682                 bcopy(ksp->ks_data, buf, ksp->ks_data_size);
 683 
 684         /*
 685          * Apply kstat type-specific data massaging
 686          */
 687         switch (ksp->ks_type) {
 688 
 689         case KSTAT_TYPE_IO:
 690                 /*
 691                  * Normalize time units and deal with incomplete transactions
 692                  */
 693                 kiop = (kstat_io_t *)buf;
 694 
 695                 scalehrtime(&kiop->wtime);
 696                 scalehrtime(&kiop->wlentime);
 697                 scalehrtime(&kiop->wlastupdate);
 698                 scalehrtime(&kiop->rtime);
 699                 scalehrtime(&kiop->rlentime);
 700                 scalehrtime(&kiop->rlastupdate);
 701 
 702                 if (kiop->wcnt != 0) {
 703                         /* like kstat_waitq_exit */
 704                         hrtime_t wfix = cur_time - kiop->wlastupdate;
 705                         kiop->wlastupdate = cur_time;
 706                         kiop->wlentime += kiop->wcnt * wfix;
 707                         kiop->wtime += wfix;
 708                 }
 709 
 710                 if (kiop->rcnt != 0) {
 711                         /* like kstat_runq_exit */
 712                         hrtime_t rfix = cur_time - kiop->rlastupdate;
 713                         kiop->rlastupdate = cur_time;
 714                         kiop->rlentime += kiop->rcnt * rfix;
 715                         kiop->rtime += rfix;
 716                 }
 717                 break;
 718 
 719         case KSTAT_TYPE_NAMED:
 720                 /*
 721                  * Massage any long strings in at the end of the buffer
 722                  */
 723                 if (ksp->ks_data_size > namedsz) {
 724                         uint_t i;
 725                         kstat_named_t *knp = buf;
 726                         char *dst = (char *)(knp + ksp->ks_ndata);
 727                         /*
 728                          * Copy strings and update pointers
 729                          */
 730                         for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 731                                 if (knp->data_type == KSTAT_DATA_STRING &&
 732                                     KSTAT_NAMED_STR_PTR(knp) != NULL) {
 733                                         bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
 734                                             KSTAT_NAMED_STR_BUFLEN(knp));
 735                                         KSTAT_NAMED_STR_PTR(knp) = dst;
 736                                         dst += KSTAT_NAMED_STR_BUFLEN(knp);
 737                                 }
 738                         }
 739                         ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
 740                 }
 741                 break;
 742         }
 743         return (0);
 744 }
 745 
 746 static int
 747 header_kstat_update(kstat_t *header_ksp, int rw)
 748 {
 749         int nkstats = 0;
 750         ekstat_t *e;
 751         avl_tree_t *t = &kstat_avl_bykid;
 752         zoneid_t zoneid;
 753 
 754         if (rw == KSTAT_WRITE)
 755                 return (EACCES);
 756 
 757         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 758 
 759         zoneid = getzoneid();
 760         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 761                 if (kstat_zone_find((kstat_t *)e, zoneid)) {
 762                         nkstats++;
 763                 }
 764         }
 765         header_ksp->ks_ndata = nkstats;
 766         header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
 767         return (0);
 768 }
 769 
 770 /*
 771  * Copy out the data section of kstat 0, which consists of the list
 772  * of all kstat headers.  By specification, these headers must be
 773  * copied out in order of increasing KID.
 774  */
 775 static int
 776 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
 777 {
 778         ekstat_t *e;
 779         avl_tree_t *t = &kstat_avl_bykid;
 780         zoneid_t zoneid;
 781 
 782         header_ksp->ks_snaptime = gethrtime();
 783 
 784         if (rw == KSTAT_WRITE)
 785                 return (EACCES);
 786 
 787         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 788 
 789         zoneid = getzoneid();
 790         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 791                 if (kstat_zone_find((kstat_t *)e, zoneid)) {
 792                         bcopy(&e->e_ks, buf, sizeof (kstat_t));
 793                         buf = (char *)buf + sizeof (kstat_t);
 794                 }
 795         }
 796 
 797         return (0);
 798 }
 799 
 800 /* ARGSUSED */
 801 static int
 802 system_misc_kstat_update(kstat_t *ksp, int rw)
 803 {
 804         int myncpus = ncpus;
 805         int *loadavgp = &avenrun[0];
 806         int loadavg[LOADAVG_NSTATS];
 807         time_t zone_boot_time;
 808         clock_t zone_lbolt;
 809         hrtime_t zone_hrtime;
 810         size_t zone_nproc;
 811 
 812         if (rw == KSTAT_WRITE)
 813                 return (EACCES);
 814 
 815         if (!INGLOBALZONE(curproc)) {
 816                 /*
 817                  * Here we grab cpu_lock which is OK as long as no-one in the
 818                  * future attempts to lookup this particular kstat
 819                  * (unix:0:system_misc) while holding cpu_lock.
 820                  */
 821                 mutex_enter(&cpu_lock);
 822                 if (pool_pset_enabled()) {
 823                         psetid_t mypsid = zone_pset_get(curproc->p_zone);
 824                         int error;
 825 
 826                         myncpus = zone_ncpus_get(curproc->p_zone);
 827                         ASSERT(myncpus > 0);
 828                         error = cpupart_get_loadavg(mypsid, &loadavg[0],
 829                             LOADAVG_NSTATS);
 830                         ASSERT(error == 0);
 831                         loadavgp = &loadavg[0];
 832                 }
 833                 mutex_exit(&cpu_lock);
 834         }
 835 
 836         if (INGLOBALZONE(curproc)) {
 837                 zone_boot_time = boot_time;
 838                 zone_lbolt = ddi_get_lbolt();
 839                 zone_nproc = nproc;
 840         } else {
 841                 struct timeval tvp;
 842                 hrt2tv(curproc->p_zone->zone_zsched->p_mstart, &tvp);
 843                 zone_boot_time = tvp.tv_sec;
 844 
 845                 zone_hrtime = gethrtime();
 846                 zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
 847                     NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
 848                 mutex_enter(&curproc->p_zone->zone_nlwps_lock);
 849                 zone_nproc = curproc->p_zone->zone_nprocs;
 850                 mutex_exit(&curproc->p_zone->zone_nlwps_lock);
 851         }
 852 
 853         system_misc_kstat.ncpus.value.ui32              = (uint32_t)myncpus;
 854         system_misc_kstat.lbolt.value.ui32              = (uint32_t)zone_lbolt;
 855         system_misc_kstat.deficit.value.ui32            = (uint32_t)deficit;
 856         system_misc_kstat.clk_intr.value.ui32           = (uint32_t)zone_lbolt;
 857         system_misc_kstat.vac.value.ui32                = (uint32_t)vac;
 858         system_misc_kstat.nproc.value.ui32              = (uint32_t)zone_nproc;
 859         system_misc_kstat.avenrun_1min.value.ui32       = (uint32_t)loadavgp[0];
 860         system_misc_kstat.avenrun_5min.value.ui32       = (uint32_t)loadavgp[1];
 861         system_misc_kstat.avenrun_15min.value.ui32      = (uint32_t)loadavgp[2];
 862         system_misc_kstat.boot_time.value.ui32          = (uint32_t)
 863             zone_boot_time;
 864         return (0);
 865 }
 866 
 867 #ifdef  __sparc
 868 extern caddr_t  econtig32;
 869 #else   /* !__sparc */
 870 extern caddr_t  econtig;
 871 #endif  /* __sparc */
 872 
 873 /* ARGSUSED */
 874 static int
 875 system_pages_kstat_update(kstat_t *ksp, int rw)
 876 {
 877         kobj_stat_t kobj_stat;
 878 
 879         if (rw == KSTAT_WRITE) {
 880                 return (EACCES);
 881         }
 882 
 883         kobj_stat_get(&kobj_stat);
 884         system_pages_kstat.physmem.value.ul     = (ulong_t)physmem;
 885         system_pages_kstat.nalloc.value.ul      = kobj_stat.nalloc;
 886         system_pages_kstat.nfree.value.ul       = kobj_stat.nfree;
 887         system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
 888         system_pages_kstat.nfree_calls.value.ul = kobj_stat.nfree_calls;
 889         system_pages_kstat.kernelbase.value.ul  = (ulong_t)KERNELBASE;
 890 
 891 #ifdef  __sparc
 892         /*
 893          * kstat should REALLY be modified to also report kmem64_base and
 894          * kmem64_end (see sun4u/os/startup.c), as the virtual address range
 895          * [ kernelbase .. econtig ] no longer is truly reflective of the
 896          * kernel's vallocs...
 897          */
 898         system_pages_kstat.econtig.value.ul     = (ulong_t)econtig32;
 899 #else   /* !__sparc */
 900         system_pages_kstat.econtig.value.ul     = (ulong_t)econtig;
 901 #endif  /* __sparc */
 902 
 903         system_pages_kstat.freemem.value.ul     = (ulong_t)freemem;
 904         system_pages_kstat.availrmem.value.ul   = (ulong_t)availrmem;
 905         system_pages_kstat.lotsfree.value.ul    = (ulong_t)lotsfree;
 906         system_pages_kstat.desfree.value.ul     = (ulong_t)desfree;
 907         system_pages_kstat.minfree.value.ul     = (ulong_t)minfree;
 908         system_pages_kstat.fastscan.value.ul    = (ulong_t)fastscan;
 909         system_pages_kstat.slowscan.value.ul    = (ulong_t)slowscan;
 910         system_pages_kstat.nscan.value.ul       = (ulong_t)nscan;
 911         system_pages_kstat.desscan.value.ul     = (ulong_t)desscan;
 912         system_pages_kstat.pagesfree.value.ul   = (ulong_t)freemem;
 913         system_pages_kstat.pageslocked.value.ul = (ulong_t)(availrmem_initial -
 914             availrmem);
 915         system_pages_kstat.pagestotal.value.ul  = (ulong_t)total_pages;
 916         /*
 917          * pp_kernel represents total pages used by the kernel since the
 918          * startup. This formula takes into account the boottime kernel
 919          * footprint and also considers the availrmem changes because of
 920          * user explicit page locking.
 921          */
 922         system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
 923             obp_pages - availrmem - k_anoninfo.ani_mem_resv -
 924             anon_segkp_pages_locked - pages_locked -
 925             pages_claimed - pages_useclaim);
 926 
 927         return (0);
 928 }
 929 
 930 kstat_t *
 931 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
 932     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
 933 {
 934         return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
 935             ks_type, ks_ndata, ks_flags, ALL_ZONES));
 936 }
 937 
 938 /*
 939  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
 940  * the specified name exists, reactivate it.  Returns a pointer to the kstat
 941  * on success, NULL on failure.  The kstat will not be visible to the
 942  * kstat driver until kstat_install().
 943  */
 944 kstat_t *
 945 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
 946     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
 947     zoneid_t ks_zoneid)
 948 {
 949         size_t ks_data_size;
 950         kstat_t *ksp;
 951         ekstat_t *e;
 952         avl_index_t where;
 953         char namebuf[KSTAT_STRLEN + 16];
 954 
 955         if (avl_numnodes(&kstat_avl_bykid) == 0) {
 956                 avl_create(&kstat_avl_bykid, kstat_compare_bykid,
 957                     sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
 958 
 959                 avl_create(&kstat_avl_byname, kstat_compare_byname,
 960                     sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
 961         }
 962 
 963         /*
 964          * If ks_name == NULL, set the ks_name to <module><instance>.
 965          */
 966         if (ks_name == NULL) {
 967                 char buf[KSTAT_STRLEN];
 968                 kstat_set_string(buf, ks_module);
 969                 (void) sprintf(namebuf, "%s%d", buf, ks_instance);
 970                 ks_name = namebuf;
 971         }
 972 
 973         /*
 974          * Make sure it's a valid kstat data type
 975          */
 976         if (ks_type >= KSTAT_NUM_TYPES) {
 977                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 978                     "invalid kstat type %d",
 979                     ks_module, ks_instance, ks_name, ks_type);
 980                 return (NULL);
 981         }
 982 
 983         /*
 984          * Don't allow persistent virtual kstats -- it makes no sense.
 985          * ks_data points to garbage when the client goes away.
 986          */
 987         if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
 988             (ks_flags & KSTAT_FLAG_VIRTUAL)) {
 989                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 990                     "cannot create persistent virtual kstat",
 991                     ks_module, ks_instance, ks_name);
 992                 return (NULL);
 993         }
 994 
 995         /*
 996          * Don't allow variable-size physical kstats, since the framework's
 997          * memory allocation for physical kstat data is fixed at creation time.
 998          */
 999         if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
1000             !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1001                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1002                     "cannot create variable-size physical kstat",
1003                     ks_module, ks_instance, ks_name);
1004                 return (NULL);
1005         }
1006 
1007         /*
1008          * Make sure the number of data fields is within legal range
1009          */
1010         if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1011             ks_ndata > kstat_data_type[ks_type].max_ndata) {
1012                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1013                     "ks_ndata=%d out of range [%d, %d]",
1014                     ks_module, ks_instance, ks_name, (int)ks_ndata,
1015                     kstat_data_type[ks_type].min_ndata,
1016                     kstat_data_type[ks_type].max_ndata);
1017                 return (NULL);
1018         }
1019 
1020         ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1021 
1022         /*
1023          * If the named kstat already exists and is dormant, reactivate it.
1024          */
1025         ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1026         if (ksp != NULL) {
1027                 if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1028                         /*
1029                          * The named kstat exists but is not dormant --
1030                          * this is a kstat namespace collision.
1031                          */
1032                         kstat_rele(ksp);
1033                         cmn_err(CE_WARN,
1034                             "kstat_create('%s', %d, '%s'): namespace collision",
1035                             ks_module, ks_instance, ks_name);
1036                         return (NULL);
1037                 }
1038                 if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1039                     (ksp->ks_type != ks_type) ||
1040                     (ksp->ks_ndata != ks_ndata) ||
1041                     (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1042                         /*
1043                          * The name is the same, but the other key parameters
1044                          * differ from those of the dormant kstat -- bogus.
1045                          */
1046                         kstat_rele(ksp);
1047                         cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1048                             "invalid reactivation of dormant kstat",
1049                             ks_module, ks_instance, ks_name);
1050                         return (NULL);
1051                 }
1052                 /*
1053                  * Return dormant kstat pointer to caller.  As usual,
1054                  * the kstat is marked invalid until kstat_install().
1055                  */
1056                 ksp->ks_flags |= KSTAT_FLAG_INVALID;
1057                 kstat_rele(ksp);
1058                 return (ksp);
1059         }
1060 
1061         /*
1062          * Allocate memory for the new kstat header and, if this is a physical
1063          * kstat, the data section.
1064          */
1065         e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1066         if (e == NULL) {
1067                 cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1068                     "insufficient kernel memory",
1069                     ks_module, ks_instance, ks_name);
1070                 return (NULL);
1071         }
1072 
1073         /*
1074          * Initialize as many fields as we can.  The caller may reset
1075          * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1076          * Creators of virtual kstats may also reset ks_data.  It is
1077          * also up to the caller to initialize the kstat data section,
1078          * if necessary.  All initialization must be complete before
1079          * calling kstat_install().
1080          */
1081         e->e_zone.zoneid = ks_zoneid;
1082         e->e_zone.next = NULL;
1083 
1084         ksp = &e->e_ks;
1085         ksp->ks_crtime               = gethrtime();
1086         kstat_set_string(ksp->ks_module, ks_module);
1087         ksp->ks_instance     = ks_instance;
1088         kstat_set_string(ksp->ks_name, ks_name);
1089         ksp->ks_type         = ks_type;
1090         kstat_set_string(ksp->ks_class, ks_class);
1091         ksp->ks_flags                = ks_flags | KSTAT_FLAG_INVALID;
1092         if (ks_flags & KSTAT_FLAG_VIRTUAL)
1093                 ksp->ks_data = NULL;
1094         else
1095                 ksp->ks_data = (void *)(e + 1);
1096         ksp->ks_ndata                = ks_ndata;
1097         ksp->ks_data_size    = ks_data_size;
1098         ksp->ks_snaptime     = ksp->ks_crtime;
1099         ksp->ks_update               = default_kstat_update;
1100         ksp->ks_private              = NULL;
1101         ksp->ks_snapshot     = default_kstat_snapshot;
1102         ksp->ks_lock         = NULL;
1103 
1104         mutex_enter(&kstat_chain_lock);
1105 
1106         /*
1107          * Add our kstat to the AVL trees.
1108          */
1109         if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1110                 mutex_exit(&kstat_chain_lock);
1111                 cmn_err(CE_WARN,
1112                     "kstat_create('%s', %d, '%s'): namespace collision",
1113                     ks_module, ks_instance, ks_name);
1114                 kstat_free(e);
1115                 return (NULL);
1116         }
1117         avl_insert(&kstat_avl_byname, e, where);
1118 
1119         /*
1120          * Loop around until we find an unused KID.
1121          */
1122         do {
1123                 ksp->ks_kid = kstat_chain_id++;
1124         } while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1125         avl_insert(&kstat_avl_bykid, e, where);
1126 
1127         mutex_exit(&kstat_chain_lock);
1128 
1129         return (ksp);
1130 }
1131 
1132 /*
1133  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1134  */
1135 void
1136 kstat_install(kstat_t *ksp)
1137 {
1138         zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1139 
1140         /*
1141          * If this is a variable-size kstat, it MUST provide kstat data locking
1142          * to prevent data-size races with kstat readers.
1143          */
1144         if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1145                 panic("kstat_install('%s', %d, '%s'): "
1146                     "cannot create variable-size kstat without data lock",
1147                     ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1148         }
1149 
1150         if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1151                 cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1152                     (void *)ksp);
1153                 return;
1154         }
1155 
1156         if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1157                 int has_long_strings = 0;
1158                 uint_t i;
1159                 kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1160 
1161                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1162                         if (knp->data_type == KSTAT_DATA_STRING) {
1163                                 has_long_strings = 1;
1164                                 break;
1165                         }
1166                 }
1167                 /*
1168                  * It is an error for a named kstat with fields of
1169                  * KSTAT_DATA_STRING to be non-virtual.
1170                  */
1171                 if (has_long_strings && !(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) {
1172                         panic("kstat_install('%s', %d, '%s'): "
1173                             "named kstat containing KSTAT_DATA_STRING "
1174                             "is not virtual",
1175                             ksp->ks_module, ksp->ks_instance,
1176                             ksp->ks_name);
1177                 }
1178                 /*
1179                  * The default snapshot routine does not handle KSTAT_WRITE
1180                  * for long strings.
1181                  */
1182                 if (has_long_strings && (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1183                     (ksp->ks_snapshot == default_kstat_snapshot)) {
1184                         panic("kstat_install('%s', %d, '%s'): "
1185                             "named kstat containing KSTAT_DATA_STRING "
1186                             "is writable but uses default snapshot routine",
1187                             ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1188                 }
1189         }
1190 
1191         if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1192 
1193                 /*
1194                  * We are reactivating a dormant kstat.  Initialize the
1195                  * caller's underlying data to the value it had when the
1196                  * kstat went dormant, and mark the kstat as active.
1197                  * Grab the provider's kstat lock if it's not already held.
1198                  */
1199                 kmutex_t *lp = ksp->ks_lock;
1200                 if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1201                         mutex_enter(lp);
1202                         (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1203                         mutex_exit(lp);
1204                 } else {
1205                         (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1206                 }
1207                 ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1208         }
1209 
1210 #ifdef DEBUG
1211         /*
1212          * Search for uninitialized kstats.
1213          */
1214         switch (ksp->ks_type) {
1215         case KSTAT_TYPE_NAMED: {
1216                 uint_t i;
1217                 kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1218 
1219                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1220                         if (knp->data_type > KSTAT_DATA_STRING) {
1221                                 cmn_err(CE_WARN,
1222                                     "kstat_install('%s', %d, '%s'): "
1223                                     "invalid data type",
1224                                     ksp->ks_module, ksp->ks_instance,
1225                                     ksp->ks_name);
1226                         }
1227 
1228                         /*
1229                          * If the name of this kstat is empty
1230                          * we assume it is uninitialized.
1231                          */
1232                         if (knp->name[0] == '\0') {
1233                                 cmn_err(CE_WARN,
1234                                     "kstat_install('%s', %d, '%s'): "
1235                                     "uninitialized kstat",
1236                                     ksp->ks_module, ksp->ks_instance,
1237                                     ksp->ks_name);
1238                         }
1239                 }
1240 
1241                 break;
1242         }
1243         default:
1244                 break;
1245         }
1246 #endif
1247 
1248         /*
1249          * Now that the kstat is active, make it visible to the kstat driver.
1250          */
1251         ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1252         kstat_rele(ksp);
1253 }
1254 
1255 /*
1256  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1257  * just update the data and mark it as dormant.
1258  */
1259 void
1260 kstat_delete(kstat_t *ksp)
1261 {
1262         kmutex_t *lp;
1263         ekstat_t *e = (ekstat_t *)ksp;
1264         zoneid_t zoneid;
1265         kstat_zone_t *kz;
1266 
1267         ASSERT(ksp != NULL);
1268 
1269         if (ksp == NULL)
1270                 return;
1271 
1272         zoneid = e->e_zone.zoneid;
1273 
1274         lp = ksp->ks_lock;
1275 
1276         if (lp != NULL && MUTEX_HELD(lp)) {
1277                 panic("kstat_delete(%p): caller holds data lock %p",
1278                     (void *)ksp, (void *)lp);
1279         }
1280 
1281         if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1282                 cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1283                     (void *)ksp);
1284                 return;
1285         }
1286 
1287         if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1288                 /*
1289                  * Update the data one last time, so that all activity
1290                  * prior to going dormant has been accounted for.
1291                  */
1292                 KSTAT_ENTER(ksp);
1293                 (void) KSTAT_UPDATE(ksp, KSTAT_READ);
1294                 KSTAT_EXIT(ksp);
1295 
1296                 /*
1297                  * Mark the kstat as dormant and restore caller-modifiable
1298                  * fields to default values, so the kstat is readable during
1299                  * the dormant phase.
1300                  */
1301                 ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1302                 ksp->ks_lock = NULL;
1303                 ksp->ks_update = default_kstat_update;
1304                 ksp->ks_private = NULL;
1305                 ksp->ks_snapshot = default_kstat_snapshot;
1306                 kstat_rele(ksp);
1307                 return;
1308         }
1309 
1310         /*
1311          * Remove the kstat from the framework's AVL trees,
1312          * free the allocated memory, and increment kstat_chain_id so
1313          * /dev/kstat clients can detect the event.
1314          */
1315         mutex_enter(&kstat_chain_lock);
1316         avl_remove(&kstat_avl_bykid, e);
1317         avl_remove(&kstat_avl_byname, e);
1318         kstat_chain_id++;
1319         mutex_exit(&kstat_chain_lock);
1320 
1321         kz = e->e_zone.next;
1322         while (kz != NULL) {
1323                 kstat_zone_t *t = kz;
1324 
1325                 kz = kz->next;
1326                 kmem_free(t, sizeof (*t));
1327         }
1328         kstat_rele(ksp);
1329         kstat_free(e);
1330 }
1331 
1332 void
1333 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1334     const char *ks_name, zoneid_t ks_zoneid)
1335 {
1336         kstat_t *ksp;
1337 
1338         ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1339         if (ksp != NULL) {
1340                 kstat_rele(ksp);
1341                 kstat_delete(ksp);
1342         }
1343 }
1344 
1345 void
1346 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1347 {
1348         kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1349 }
1350 
1351 /*
1352  * The sparc V9 versions of these routines can be much cheaper than
1353  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1354  * For simplicity, however, we always feed the C versions to lint.
1355  */
1356 #if !defined(__sparc) || defined(lint) || defined(__lint)
1357 
1358 void
1359 kstat_waitq_enter(kstat_io_t *kiop)
1360 {
1361         hrtime_t new, delta;
1362         ulong_t wcnt;
1363 
1364         new = gethrtime_unscaled();
1365         delta = new - kiop->wlastupdate;
1366         kiop->wlastupdate = new;
1367         wcnt = kiop->wcnt++;
1368         if (wcnt != 0) {
1369                 kiop->wlentime += delta * wcnt;
1370                 kiop->wtime += delta;
1371         }
1372 }
1373 
1374 void
1375 kstat_waitq_exit(kstat_io_t *kiop)
1376 {
1377         hrtime_t new, delta;
1378         ulong_t wcnt;
1379 
1380         new = gethrtime_unscaled();
1381         delta = new - kiop->wlastupdate;
1382         kiop->wlastupdate = new;
1383         wcnt = kiop->wcnt--;
1384         ASSERT((int)wcnt > 0);
1385         kiop->wlentime += delta * wcnt;
1386         kiop->wtime += delta;
1387 }
1388 
1389 void
1390 kstat_runq_enter(kstat_io_t *kiop)
1391 {
1392         hrtime_t new, delta;
1393         ulong_t rcnt;
1394 
1395         new = gethrtime_unscaled();
1396         delta = new - kiop->rlastupdate;
1397         kiop->rlastupdate = new;
1398         rcnt = kiop->rcnt++;
1399         if (rcnt != 0) {
1400                 kiop->rlentime += delta * rcnt;
1401                 kiop->rtime += delta;
1402         }
1403 }
1404 
1405 void
1406 kstat_runq_exit(kstat_io_t *kiop)
1407 {
1408         hrtime_t new, delta;
1409         ulong_t rcnt;
1410 
1411         new = gethrtime_unscaled();
1412         delta = new - kiop->rlastupdate;
1413         kiop->rlastupdate = new;
1414         rcnt = kiop->rcnt--;
1415         ASSERT((int)rcnt > 0);
1416         kiop->rlentime += delta * rcnt;
1417         kiop->rtime += delta;
1418 }
1419 
1420 void
1421 kstat_waitq_to_runq(kstat_io_t *kiop)
1422 {
1423         hrtime_t new, delta;
1424         ulong_t wcnt, rcnt;
1425 
1426         new = gethrtime_unscaled();
1427 
1428         delta = new - kiop->wlastupdate;
1429         kiop->wlastupdate = new;
1430         wcnt = kiop->wcnt--;
1431         ASSERT((int)wcnt > 0);
1432         kiop->wlentime += delta * wcnt;
1433         kiop->wtime += delta;
1434 
1435         delta = new - kiop->rlastupdate;
1436         kiop->rlastupdate = new;
1437         rcnt = kiop->rcnt++;
1438         if (rcnt != 0) {
1439                 kiop->rlentime += delta * rcnt;
1440                 kiop->rtime += delta;
1441         }
1442 }
1443 
1444 void
1445 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1446 {
1447         hrtime_t new, delta;
1448         ulong_t wcnt, rcnt;
1449 
1450         new = gethrtime_unscaled();
1451 
1452         delta = new - kiop->rlastupdate;
1453         kiop->rlastupdate = new;
1454         rcnt = kiop->rcnt--;
1455         ASSERT((int)rcnt > 0);
1456         kiop->rlentime += delta * rcnt;
1457         kiop->rtime += delta;
1458 
1459         delta = new - kiop->wlastupdate;
1460         kiop->wlastupdate = new;
1461         wcnt = kiop->wcnt++;
1462         if (wcnt != 0) {
1463                 kiop->wlentime += delta * wcnt;
1464                 kiop->wtime += delta;
1465         }
1466 }
1467 
1468 #endif
1469 
1470 void
1471 kstat_timer_start(kstat_timer_t *ktp)
1472 {
1473         ktp->start_time = gethrtime();
1474 }
1475 
1476 void
1477 kstat_timer_stop(kstat_timer_t *ktp)
1478 {
1479         hrtime_t        etime;
1480         u_longlong_t    num_events;
1481 
1482         ktp->stop_time = etime = gethrtime();
1483         etime -= ktp->start_time;
1484         num_events = ktp->num_events;
1485         if (etime < ktp->min_time || num_events == 0)
1486                 ktp->min_time = etime;
1487         if (etime > ktp->max_time)
1488                 ktp->max_time = etime;
1489         ktp->elapsed_time += etime;
1490         ktp->num_events = num_events + 1;
1491 }