1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2014, Joyent, Inc. All rights reserved.
  24  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  * Kernel statistics framework
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/time.h>
  33 #include <sys/systm.h>
  34 #include <sys/vmsystm.h>
  35 #include <sys/t_lock.h>
  36 #include <sys/param.h>
  37 #include <sys/errno.h>
  38 #include <sys/vmem.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/kstat.h>
  42 #include <sys/sysinfo.h>
  43 #include <sys/cpuvar.h>
  44 #include <sys/fcntl.h>
  45 #include <sys/flock.h>
  46 #include <sys/vnode.h>
  47 #include <sys/vfs.h>
  48 #include <sys/dnlc.h>
  49 #include <sys/var.h>
  50 #include <sys/debug.h>
  51 #include <sys/kobj.h>
  52 #include <sys/avl.h>
  53 #include <sys/pool_pset.h>
  54 #include <sys/cpupart.h>
  55 #include <sys/zone.h>
  56 #include <sys/loadavg.h>
  57 #include <vm/page.h>
  58 #include <vm/anon.h>
  59 #include <vm/seg_kmem.h>
  60 
  61 /*
  62  * Global lock to protect the AVL trees and kstat_chain_id.
  63  */
  64 static kmutex_t kstat_chain_lock;
  65 
  66 /*
  67  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
  68  *
  69  * (1)  /dev/kstat, to detect changes in the kstat chain across ioctls;
  70  *
  71  * (2)  kstat_create(), to assign a KID (kstat ID) to each new kstat.
  72  *      /dev/kstat uses the KID as a cookie for kstat lookups.
  73  *
  74  * We reserve the first two IDs because some kstats are created before
  75  * the well-known ones (kstat_headers = 0, kstat_types = 1).
  76  *
  77  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
  78  * into a particular kstat, which is logically equivalent to a kstat being
  79  * installed/deleted.
  80  */
  81 
  82 kid_t kstat_chain_id = 2;
  83 
  84 /*
  85  * As far as zones are concerned, there are 3 types of kstat:
  86  *
  87  * 1) Those which have a well-known name, and which should return per-zone data
  88  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
  89  * is an example of this type of kstat.
  90  *
  91  * 2) Those which should only be exported to a particular list of zones.
  92  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
  93  * able to see NFS mounts associated with zone B, while we want the
  94  * global zone to be able to see all mounts on the system.
  95  *
  96  * 3) Those that can be exported to all zones.  Most system-related
  97  * kstats fall within this category.
  98  *
  99  * An ekstat_t thus contains a list of kstats that the zone is to be
 100  * exported to.  The lookup of a name:instance:module thus translates to a
 101  * lookup of name:instance:module:myzone; if the kstat is not exported
 102  * to all zones, and does not have the caller's zoneid explicitly
 103  * enumerated in the list of zones to be exported to, it is the same as
 104  * if the kstat didn't exist.
 105  *
 106  * Writing to kstats is currently disallowed from within a non-global
 107  * zone, although this restriction could be removed in the future.
 108  */
 109 typedef struct kstat_zone {
 110         zoneid_t zoneid;
 111         struct kstat_zone *next;
 112 } kstat_zone_t;
 113 
 114 /*
 115  * Extended kstat structure -- for internal use only.
 116  */
 117 typedef struct ekstat {
 118         kstat_t         e_ks;           /* the kstat itself */
 119         size_t          e_size;         /* total allocation size */
 120         kthread_t       *e_owner;       /* thread holding this kstat */
 121         kcondvar_t      e_cv;           /* wait for owner == NULL */
 122         avl_node_t      e_avl_bykid;    /* AVL tree to sort by KID */
 123         avl_node_t      e_avl_byname;   /* AVL tree to sort by name */
 124         kstat_zone_t    e_zone;         /* zone to export stats to */
 125 } ekstat_t;
 126 
 127 static uint64_t kstat_initial[8192];
 128 static void *kstat_initial_ptr = kstat_initial;
 129 static size_t kstat_initial_avail = sizeof (kstat_initial);
 130 static vmem_t *kstat_arena;
 131 
 132 #define KSTAT_ALIGN     (sizeof (uint64_t))
 133 
 134 static avl_tree_t kstat_avl_bykid;
 135 static avl_tree_t kstat_avl_byname;
 136 
 137 /*
 138  * Various pointers we need to create kstats at boot time in kstat_init()
 139  */
 140 extern  kstat_named_t   *segmapcnt_ptr;
 141 extern  uint_t          segmapcnt_ndata;
 142 extern  int             segmap_kstat_update(kstat_t *, int);
 143 extern  kstat_named_t   *biostats_ptr;
 144 extern  uint_t          biostats_ndata;
 145 extern  kstat_named_t   *pollstats_ptr;
 146 extern  uint_t          pollstats_ndata;
 147 
 148 extern  int     vac;
 149 extern  uint_t  nproc;
 150 extern  time_t  boot_time;
 151 extern  sysinfo_t       sysinfo;
 152 extern  vminfo_t        vminfo;
 153 
 154 struct {
 155         kstat_named_t ncpus;
 156         kstat_named_t lbolt;
 157         kstat_named_t deficit;
 158         kstat_named_t clk_intr;
 159         kstat_named_t vac;
 160         kstat_named_t nproc;
 161         kstat_named_t avenrun_1min;
 162         kstat_named_t avenrun_5min;
 163         kstat_named_t avenrun_15min;
 164         kstat_named_t boot_time;
 165         kstat_named_t nsec_per_tick;
 166 } system_misc_kstat = {
 167         { "ncpus",              KSTAT_DATA_UINT32 },
 168         { "lbolt",              KSTAT_DATA_UINT32 },
 169         { "deficit",            KSTAT_DATA_UINT32 },
 170         { "clk_intr",           KSTAT_DATA_UINT32 },
 171         { "vac",                KSTAT_DATA_UINT32 },
 172         { "nproc",              KSTAT_DATA_UINT32 },
 173         { "avenrun_1min",       KSTAT_DATA_UINT32 },
 174         { "avenrun_5min",       KSTAT_DATA_UINT32 },
 175         { "avenrun_15min",      KSTAT_DATA_UINT32 },
 176         { "boot_time",          KSTAT_DATA_UINT32 },
 177         { "nsec_per_tick",      KSTAT_DATA_UINT32 },
 178 };
 179 
 180 struct {
 181         kstat_named_t physmem;
 182         kstat_named_t nalloc;
 183         kstat_named_t nfree;
 184         kstat_named_t nalloc_calls;
 185         kstat_named_t nfree_calls;
 186         kstat_named_t kernelbase;
 187         kstat_named_t econtig;
 188         kstat_named_t freemem;
 189         kstat_named_t availrmem;
 190         kstat_named_t lotsfree;
 191         kstat_named_t desfree;
 192         kstat_named_t minfree;
 193         kstat_named_t fastscan;
 194         kstat_named_t slowscan;
 195         kstat_named_t nscan;
 196         kstat_named_t desscan;
 197         kstat_named_t pp_kernel;
 198         kstat_named_t pagesfree;
 199         kstat_named_t pageslocked;
 200         kstat_named_t pagestotal;
 201 } system_pages_kstat = {
 202         { "physmem",            KSTAT_DATA_ULONG },
 203         { "nalloc",             KSTAT_DATA_ULONG },
 204         { "nfree",              KSTAT_DATA_ULONG },
 205         { "nalloc_calls",       KSTAT_DATA_ULONG },
 206         { "nfree_calls",        KSTAT_DATA_ULONG },
 207         { "kernelbase",         KSTAT_DATA_ULONG },
 208         { "econtig",            KSTAT_DATA_ULONG },
 209         { "freemem",            KSTAT_DATA_ULONG },
 210         { "availrmem",          KSTAT_DATA_ULONG },
 211         { "lotsfree",           KSTAT_DATA_ULONG },
 212         { "desfree",            KSTAT_DATA_ULONG },
 213         { "minfree",            KSTAT_DATA_ULONG },
 214         { "fastscan",           KSTAT_DATA_ULONG },
 215         { "slowscan",           KSTAT_DATA_ULONG },
 216         { "nscan",              KSTAT_DATA_ULONG },
 217         { "desscan",            KSTAT_DATA_ULONG },
 218         { "pp_kernel",          KSTAT_DATA_ULONG },
 219         { "pagesfree",          KSTAT_DATA_ULONG },
 220         { "pageslocked",        KSTAT_DATA_ULONG },
 221         { "pagestotal",         KSTAT_DATA_ULONG },
 222 };
 223 
 224 static int header_kstat_update(kstat_t *, int);
 225 static int header_kstat_snapshot(kstat_t *, void *, int);
 226 static int system_misc_kstat_update(kstat_t *, int);
 227 static int system_pages_kstat_update(kstat_t *, int);
 228 
 229 static struct {
 230         char    name[KSTAT_STRLEN];
 231         size_t  size;
 232         uint_t  min_ndata;
 233         uint_t  max_ndata;
 234 } kstat_data_type[KSTAT_NUM_TYPES] = {
 235         { "raw",                1,                      0,      INT_MAX },
 236         { "name=value",         sizeof (kstat_named_t), 0,      INT_MAX },
 237         { "interrupt",          sizeof (kstat_intr_t),  1,      1       },
 238         { "i/o",                sizeof (kstat_io_t),    1,      1       },
 239         { "event_timer",        sizeof (kstat_timer_t), 0,      INT_MAX },
 240 };
 241 
 242 int
 243 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
 244 {
 245         ekstat_t *e = (ekstat_t *)k;
 246         kstat_zone_t *kz;
 247 
 248         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 249         for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
 250                 if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
 251                         return (1);
 252                 if (zoneid == kz->zoneid)
 253                         return (1);
 254         }
 255         return (0);
 256 }
 257 
 258 void
 259 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
 260 {
 261         ekstat_t *e = (ekstat_t *)k;
 262         kstat_zone_t *kz, *t = NULL;
 263 
 264         mutex_enter(&kstat_chain_lock);
 265         if (zoneid == e->e_zone.zoneid) {
 266                 kz = e->e_zone.next;
 267                 ASSERT(kz != NULL);
 268                 e->e_zone.zoneid = kz->zoneid;
 269                 e->e_zone.next = kz->next;
 270                 goto out;
 271         }
 272         for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
 273                 if (kz->next->zoneid == zoneid) {
 274                         t = kz->next;
 275                         kz->next = t->next;
 276                         break;
 277                 }
 278         }
 279         ASSERT(t != NULL);      /* we removed something */
 280         kz = t;
 281 out:
 282         kstat_chain_id++;
 283         mutex_exit(&kstat_chain_lock);
 284         kmem_free(kz, sizeof (*kz));
 285 }
 286 
 287 void
 288 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
 289 {
 290         ekstat_t *e = (ekstat_t *)k;
 291         kstat_zone_t *kz;
 292 
 293         kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
 294         if (kz == NULL)
 295                 return;
 296         mutex_enter(&kstat_chain_lock);
 297         kz->zoneid = zoneid;
 298         kz->next = e->e_zone.next;
 299         e->e_zone.next = kz;
 300         kstat_chain_id++;
 301         mutex_exit(&kstat_chain_lock);
 302 }
 303 
 304 /*
 305  * Compare the list of zones for the given kstats, returning 0 if they match
 306  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
 307  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
 308  * two lists always has one element, and this is an O(n) operation rather than
 309  * O(n^2).
 310  */
 311 static int
 312 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
 313 {
 314         kstat_zone_t *kz1, *kz2;
 315 
 316         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 317         for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
 318                 for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
 319                         if (kz1->zoneid == ALL_ZONES ||
 320                             kz2->zoneid == ALL_ZONES)
 321                                 return (0);
 322                         if (kz1->zoneid == kz2->zoneid)
 323                                 return (0);
 324                 }
 325         }
 326         return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
 327 }
 328 
 329 /*
 330  * Support for keeping kstats sorted in AVL trees for fast lookups.
 331  */
 332 static int
 333 kstat_compare_bykid(const void *a1, const void *a2)
 334 {
 335         const kstat_t *k1 = a1;
 336         const kstat_t *k2 = a2;
 337 
 338         if (k1->ks_kid < k2->ks_kid)
 339                 return (-1);
 340         if (k1->ks_kid > k2->ks_kid)
 341                 return (1);
 342         return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 343 }
 344 
 345 static int
 346 kstat_compare_byname(const void *a1, const void *a2)
 347 {
 348         const kstat_t *k1 = a1;
 349         const kstat_t *k2 = a2;
 350         int s;
 351 
 352         s = strcmp(k1->ks_module, k2->ks_module);
 353         if (s > 0)
 354                 return (1);
 355         if (s < 0)
 356                 return (-1);
 357 
 358         if (k1->ks_instance < k2->ks_instance)
 359                 return (-1);
 360         if (k1->ks_instance > k2->ks_instance)
 361                 return (1);
 362 
 363         s = strcmp(k1->ks_name, k2->ks_name);
 364         if (s > 0)
 365                 return (1);
 366         if (s < 0)
 367                 return (-1);
 368 
 369         return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 370 }
 371 
 372 static kstat_t *
 373 kstat_hold(avl_tree_t *t, ekstat_t *template)
 374 {
 375         kstat_t *ksp;
 376         ekstat_t *e;
 377 
 378         mutex_enter(&kstat_chain_lock);
 379         for (;;) {
 380                 ksp = avl_find(t, template, NULL);
 381                 if (ksp == NULL)
 382                         break;
 383                 e = (ekstat_t *)ksp;
 384                 if (e->e_owner == NULL) {
 385                         e->e_owner = curthread;
 386                         break;
 387                 }
 388                 cv_wait(&e->e_cv, &kstat_chain_lock);
 389         }
 390         mutex_exit(&kstat_chain_lock);
 391         return (ksp);
 392 }
 393 
 394 void
 395 kstat_rele(kstat_t *ksp)
 396 {
 397         ekstat_t *e = (ekstat_t *)ksp;
 398 
 399         mutex_enter(&kstat_chain_lock);
 400         ASSERT(e->e_owner == curthread);
 401         e->e_owner = NULL;
 402         cv_broadcast(&e->e_cv);
 403         mutex_exit(&kstat_chain_lock);
 404 }
 405 
 406 kstat_t *
 407 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
 408 {
 409         ekstat_t e;
 410 
 411         e.e_ks.ks_kid = kid;
 412         e.e_zone.zoneid = zoneid;
 413         e.e_zone.next = NULL;
 414 
 415         return (kstat_hold(&kstat_avl_bykid, &e));
 416 }
 417 
 418 kstat_t *
 419 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
 420     zoneid_t ks_zoneid)
 421 {
 422         ekstat_t e;
 423 
 424         kstat_set_string(e.e_ks.ks_module, ks_module);
 425         e.e_ks.ks_instance = ks_instance;
 426         kstat_set_string(e.e_ks.ks_name, ks_name);
 427         e.e_zone.zoneid = ks_zoneid;
 428         e.e_zone.next = NULL;
 429         return (kstat_hold(&kstat_avl_byname, &e));
 430 }
 431 
 432 static ekstat_t *
 433 kstat_alloc(size_t size)
 434 {
 435         ekstat_t *e = NULL;
 436 
 437         size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
 438 
 439         if (kstat_arena == NULL) {
 440                 if (size <= kstat_initial_avail) {
 441                         e = kstat_initial_ptr;
 442                         kstat_initial_ptr = (char *)kstat_initial_ptr + size;
 443                         kstat_initial_avail -= size;
 444                 }
 445         } else {
 446                 e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
 447         }
 448 
 449         if (e != NULL) {
 450                 bzero(e, size);
 451                 e->e_size = size;
 452                 cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
 453         }
 454 
 455         return (e);
 456 }
 457 
 458 static void
 459 kstat_free(ekstat_t *e)
 460 {
 461         cv_destroy(&e->e_cv);
 462         vmem_free(kstat_arena, e, e->e_size);
 463 }
 464 
 465 /*
 466  * Create various system kstats.
 467  */
 468 void
 469 kstat_init(void)
 470 {
 471         kstat_t *ksp;
 472         ekstat_t *e;
 473         avl_tree_t *t = &kstat_avl_bykid;
 474 
 475         /*
 476          * Set up the kstat vmem arena.
 477          */
 478         kstat_arena = vmem_create("kstat",
 479             kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
 480             segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
 481 
 482         /*
 483          * Make initial kstats appear as though they were allocated.
 484          */
 485         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
 486                 (void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
 487                     0, 0, e, (char *)e + e->e_size,
 488                     VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 489 
 490         /*
 491          * The mother of all kstats.  The first kstat in the system, which
 492          * always has KID 0, has the headers for all kstats (including itself)
 493          * as its data.  Thus, the kstat driver does not need any special
 494          * interface to extract the kstat chain.
 495          */
 496         kstat_chain_id = 0;
 497         ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
 498             0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
 499         if (ksp) {
 500                 ksp->ks_lock = &kstat_chain_lock;
 501                 ksp->ks_update = header_kstat_update;
 502                 ksp->ks_snapshot = header_kstat_snapshot;
 503                 kstat_install(ksp);
 504         } else {
 505                 panic("cannot create kstat 'kstat_headers'");
 506         }
 507 
 508         ksp = kstat_create("unix", 0, "kstat_types", "kstat",
 509             KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
 510         if (ksp) {
 511                 int i;
 512                 kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
 513 
 514                 for (i = 0; i < KSTAT_NUM_TYPES; i++) {
 515                         kstat_named_init(&kn[i], kstat_data_type[i].name,
 516                             KSTAT_DATA_ULONG);
 517                         kn[i].value.ul = i;
 518                 }
 519                 kstat_install(ksp);
 520         }
 521 
 522         ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
 523             sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
 524         if (ksp) {
 525                 ksp->ks_data = (void *) &sysinfo;
 526                 kstat_install(ksp);
 527         }
 528 
 529         ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
 530             sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
 531         if (ksp) {
 532                 ksp->ks_data = (void *) &vminfo;
 533                 kstat_install(ksp);
 534         }
 535 
 536         ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
 537             segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
 538         if (ksp) {
 539                 ksp->ks_data = (void *) segmapcnt_ptr;
 540                 ksp->ks_update = segmap_kstat_update;
 541                 kstat_install(ksp);
 542         }
 543 
 544         ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
 545             biostats_ndata, KSTAT_FLAG_VIRTUAL);
 546         if (ksp) {
 547                 ksp->ks_data = (void *) biostats_ptr;
 548                 kstat_install(ksp);
 549         }
 550 
 551         ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
 552             sizeof (struct var), KSTAT_FLAG_VIRTUAL);
 553         if (ksp) {
 554                 ksp->ks_data = (void *) &v;
 555                 kstat_install(ksp);
 556         }
 557 
 558         ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
 559             sizeof (system_misc_kstat) / sizeof (kstat_named_t),
 560             KSTAT_FLAG_VIRTUAL);
 561         if (ksp) {
 562                 ksp->ks_data = (void *) &system_misc_kstat;
 563                 ksp->ks_update = system_misc_kstat_update;
 564                 kstat_install(ksp);
 565         }
 566 
 567         ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
 568             sizeof (system_pages_kstat) / sizeof (kstat_named_t),
 569             KSTAT_FLAG_VIRTUAL);
 570         if (ksp) {
 571                 ksp->ks_data = (void *) &system_pages_kstat;
 572                 ksp->ks_update = system_pages_kstat_update;
 573                 kstat_install(ksp);
 574         }
 575 
 576         ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
 577             pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
 578 
 579         if (ksp) {
 580                 ksp->ks_data = pollstats_ptr;
 581                 kstat_install(ksp);
 582         }
 583 }
 584 
 585 /*
 586  * Caller of this should ensure that the string pointed by src
 587  * doesn't change while kstat's lock is held. Not doing so defeats
 588  * kstat's snapshot strategy as explained in <sys/kstat.h>
 589  */
 590 void
 591 kstat_named_setstr(kstat_named_t *knp, const char *src)
 592 {
 593         if (knp->data_type != KSTAT_DATA_STRING)
 594                 panic("kstat_named_setstr('%p', '%p'): "
 595                     "named kstat is not of type KSTAT_DATA_STRING",
 596                     (void *)knp, (void *)src);
 597 
 598         KSTAT_NAMED_STR_PTR(knp) = (char *)src;
 599         if (src != NULL)
 600                 KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
 601         else
 602                 KSTAT_NAMED_STR_BUFLEN(knp) = 0;
 603 }
 604 
 605 void
 606 kstat_set_string(char *dst, const char *src)
 607 {
 608         bzero(dst, KSTAT_STRLEN);
 609         (void) strncpy(dst, src, KSTAT_STRLEN - 1);
 610 }
 611 
 612 void
 613 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
 614 {
 615         kstat_set_string(knp->name, name);
 616         knp->data_type = data_type;
 617 
 618         if (data_type == KSTAT_DATA_STRING)
 619                 kstat_named_setstr(knp, NULL);
 620 }
 621 
 622 void
 623 kstat_timer_init(kstat_timer_t *ktp, const char *name)
 624 {
 625         kstat_set_string(ktp->name, name);
 626 }
 627 
 628 /* ARGSUSED */
 629 static int
 630 default_kstat_update(kstat_t *ksp, int rw)
 631 {
 632         uint_t i;
 633         size_t len = 0;
 634         kstat_named_t *knp;
 635 
 636         /*
 637          * Named kstats with variable-length long strings have a standard
 638          * way of determining how much space is needed to hold the snapshot:
 639          */
 640         if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
 641             (ksp->ks_flags & (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_LONGSTRINGS))) {
 642 
 643                 /*
 644                  * Add in the space required for the strings
 645                  */
 646                 knp = KSTAT_NAMED_PTR(ksp);
 647                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 648                         if (knp->data_type == KSTAT_DATA_STRING)
 649                                 len += KSTAT_NAMED_STR_BUFLEN(knp);
 650                 }
 651                 ksp->ks_data_size =
 652                     ksp->ks_ndata * sizeof (kstat_named_t) + len;
 653         }
 654         return (0);
 655 }
 656 
 657 static int
 658 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
 659 {
 660         kstat_io_t *kiop;
 661         hrtime_t cur_time;
 662         size_t  namedsz;
 663 
 664         ksp->ks_snaptime = cur_time = gethrtime();
 665 
 666         if (rw == KSTAT_WRITE) {
 667                 if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
 668                         return (EACCES);
 669                 bcopy(buf, ksp->ks_data, ksp->ks_data_size);
 670                 return (0);
 671         }
 672 
 673         /*
 674          * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
 675          * number of kstat_named_t structures, followed by an optional
 676          * string segment. The ks_data generally holds only the
 677          * kstat_named_t structures. So we copy it first. The strings,
 678          * if any, are copied below. For other kstat types, ks_data holds the
 679          * entire buffer.
 680          */
 681 
 682         namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
 683         if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
 684                 bcopy(ksp->ks_data, buf, namedsz);
 685         else
 686                 bcopy(ksp->ks_data, buf, ksp->ks_data_size);
 687 
 688         /*
 689          * Apply kstat type-specific data massaging
 690          */
 691         switch (ksp->ks_type) {
 692 
 693         case KSTAT_TYPE_IO:
 694                 /*
 695                  * Normalize time units and deal with incomplete transactions
 696                  */
 697                 kiop = (kstat_io_t *)buf;
 698 
 699                 scalehrtime(&kiop->wtime);
 700                 scalehrtime(&kiop->wlentime);
 701                 scalehrtime(&kiop->wlastupdate);
 702                 scalehrtime(&kiop->rtime);
 703                 scalehrtime(&kiop->rlentime);
 704                 scalehrtime(&kiop->rlastupdate);
 705 
 706                 if (kiop->wcnt != 0) {
 707                         /* like kstat_waitq_exit */
 708                         hrtime_t wfix = cur_time - kiop->wlastupdate;
 709                         kiop->wlastupdate = cur_time;
 710                         kiop->wlentime += kiop->wcnt * wfix;
 711                         kiop->wtime += wfix;
 712                 }
 713 
 714                 if (kiop->rcnt != 0) {
 715                         /* like kstat_runq_exit */
 716                         hrtime_t rfix = cur_time - kiop->rlastupdate;
 717                         kiop->rlastupdate = cur_time;
 718                         kiop->rlentime += kiop->rcnt * rfix;
 719                         kiop->rtime += rfix;
 720                 }
 721                 break;
 722 
 723         case KSTAT_TYPE_NAMED:
 724                 /*
 725                  * Massage any long strings in at the end of the buffer
 726                  */
 727                 if (ksp->ks_data_size > namedsz) {
 728                         uint_t i;
 729                         kstat_named_t *knp = buf;
 730                         char *dst = (char *)(knp + ksp->ks_ndata);
 731                         /*
 732                          * Copy strings and update pointers
 733                          */
 734                         for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 735                                 if (knp->data_type == KSTAT_DATA_STRING &&
 736                                     KSTAT_NAMED_STR_PTR(knp) != NULL) {
 737                                         bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
 738                                             KSTAT_NAMED_STR_BUFLEN(knp));
 739                                         KSTAT_NAMED_STR_PTR(knp) = dst;
 740                                         dst += KSTAT_NAMED_STR_BUFLEN(knp);
 741                                 }
 742                         }
 743                         ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
 744                 }
 745                 break;
 746         }
 747         return (0);
 748 }
 749 
 750 static int
 751 header_kstat_update(kstat_t *header_ksp, int rw)
 752 {
 753         int nkstats = 0;
 754         ekstat_t *e;
 755         avl_tree_t *t = &kstat_avl_bykid;
 756         zoneid_t zoneid;
 757 
 758         if (rw == KSTAT_WRITE)
 759                 return (EACCES);
 760 
 761         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 762 
 763         zoneid = getzoneid();
 764         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 765                 if (kstat_zone_find((kstat_t *)e, zoneid) &&
 766                     (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
 767                         nkstats++;
 768                 }
 769         }
 770         header_ksp->ks_ndata = nkstats;
 771         header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
 772         return (0);
 773 }
 774 
 775 /*
 776  * Copy out the data section of kstat 0, which consists of the list
 777  * of all kstat headers.  By specification, these headers must be
 778  * copied out in order of increasing KID.
 779  */
 780 static int
 781 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
 782 {
 783         ekstat_t *e;
 784         avl_tree_t *t = &kstat_avl_bykid;
 785         zoneid_t zoneid;
 786 
 787         header_ksp->ks_snaptime = gethrtime();
 788 
 789         if (rw == KSTAT_WRITE)
 790                 return (EACCES);
 791 
 792         ASSERT(MUTEX_HELD(&kstat_chain_lock));
 793 
 794         zoneid = getzoneid();
 795         for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 796                 if (kstat_zone_find((kstat_t *)e, zoneid) &&
 797                     (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
 798                         bcopy(&e->e_ks, buf, sizeof (kstat_t));
 799                         buf = (char *)buf + sizeof (kstat_t);
 800                 }
 801         }
 802 
 803         return (0);
 804 }
 805 
 806 /* ARGSUSED */
 807 static int
 808 system_misc_kstat_update(kstat_t *ksp, int rw)
 809 {
 810         int myncpus = ncpus;
 811         int *loadavgp = &avenrun[0];
 812         time_t zone_boot_time;
 813         clock_t zone_lbolt;
 814         hrtime_t zone_hrtime;
 815         size_t zone_nproc;
 816 
 817         if (rw == KSTAT_WRITE)
 818                 return (EACCES);
 819 
 820         if (!INGLOBALZONE(curproc)) {
 821                 /*
 822                  * Here we grab cpu_lock which is OK as long as no-one in the
 823                  * future attempts to lookup this particular kstat
 824                  * (unix:0:system_misc) while holding cpu_lock.
 825                  */
 826                 mutex_enter(&cpu_lock);
 827                 if (pool_pset_enabled()) {
 828                         myncpus = zone_ncpus_get(curproc->p_zone);
 829                         ASSERT(myncpus > 0);
 830                 }
 831                 mutex_exit(&cpu_lock);
 832                 loadavgp = &curproc->p_zone->zone_avenrun[0];
 833         }
 834 
 835         if (INGLOBALZONE(curproc)) {
 836                 zone_boot_time = boot_time;
 837                 zone_lbolt = ddi_get_lbolt();
 838                 zone_nproc = nproc;
 839         } else {
 840                 zone_boot_time = curproc->p_zone->zone_boot_time;
 841 
 842                 zone_hrtime = gethrtime();
 843                 zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
 844                     NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
 845                 mutex_enter(&curproc->p_zone->zone_nlwps_lock);
 846                 zone_nproc = curproc->p_zone->zone_nprocs;
 847                 mutex_exit(&curproc->p_zone->zone_nlwps_lock);
 848         }
 849 
 850         system_misc_kstat.ncpus.value.ui32              = (uint32_t)myncpus;
 851         system_misc_kstat.lbolt.value.ui32              = (uint32_t)zone_lbolt;
 852         system_misc_kstat.deficit.value.ui32            = (uint32_t)deficit;
 853         system_misc_kstat.clk_intr.value.ui32           = (uint32_t)zone_lbolt;
 854         system_misc_kstat.vac.value.ui32                = (uint32_t)vac;
 855         system_misc_kstat.nproc.value.ui32              = (uint32_t)zone_nproc;
 856         system_misc_kstat.avenrun_1min.value.ui32       = (uint32_t)loadavgp[0];
 857         system_misc_kstat.avenrun_5min.value.ui32       = (uint32_t)loadavgp[1];
 858         system_misc_kstat.avenrun_15min.value.ui32      = (uint32_t)loadavgp[2];
 859         system_misc_kstat.boot_time.value.ui32          = (uint32_t)
 860             zone_boot_time;
 861         system_misc_kstat.nsec_per_tick.value.ui32      = (uint32_t)
 862             nsec_per_tick;
 863         return (0);
 864 }
 865 
 866 #ifdef  __sparc
 867 extern caddr_t  econtig32;
 868 #else   /* !__sparc */
 869 extern caddr_t  econtig;
 870 #endif  /* __sparc */
 871 
 872 /* ARGSUSED */
 873 static int
 874 system_pages_kstat_update(kstat_t *ksp, int rw)
 875 {
 876         kobj_stat_t kobj_stat;
 877 
 878         if (rw == KSTAT_WRITE) {
 879                 return (EACCES);
 880         }
 881 
 882         kobj_stat_get(&kobj_stat);
 883         system_pages_kstat.physmem.value.ul     = (ulong_t)physmem;
 884         system_pages_kstat.nalloc.value.ul      = kobj_stat.nalloc;
 885         system_pages_kstat.nfree.value.ul       = kobj_stat.nfree;
 886         system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
 887         system_pages_kstat.nfree_calls.value.ul = kobj_stat.nfree_calls;
 888         system_pages_kstat.kernelbase.value.ul  = (ulong_t)KERNELBASE;
 889 
 890 #ifdef  __sparc
 891         /*
 892          * kstat should REALLY be modified to also report kmem64_base and
 893          * kmem64_end (see sun4u/os/startup.c), as the virtual address range
 894          * [ kernelbase .. econtig ] no longer is truly reflective of the
 895          * kernel's vallocs...
 896          */
 897         system_pages_kstat.econtig.value.ul     = (ulong_t)econtig32;
 898 #else   /* !__sparc */
 899         system_pages_kstat.econtig.value.ul     = (ulong_t)econtig;
 900 #endif  /* __sparc */
 901 
 902         system_pages_kstat.freemem.value.ul     = (ulong_t)freemem;
 903         system_pages_kstat.availrmem.value.ul   = (ulong_t)availrmem;
 904         system_pages_kstat.lotsfree.value.ul    = (ulong_t)lotsfree;
 905         system_pages_kstat.desfree.value.ul     = (ulong_t)desfree;
 906         system_pages_kstat.minfree.value.ul     = (ulong_t)minfree;
 907         system_pages_kstat.fastscan.value.ul    = (ulong_t)fastscan;
 908         system_pages_kstat.slowscan.value.ul    = (ulong_t)slowscan;
 909         system_pages_kstat.nscan.value.ul       = (ulong_t)nscan;
 910         system_pages_kstat.desscan.value.ul     = (ulong_t)desscan;
 911         system_pages_kstat.pagesfree.value.ul   = (ulong_t)freemem;
 912         system_pages_kstat.pageslocked.value.ul = (ulong_t)(availrmem_initial -
 913             availrmem);
 914         system_pages_kstat.pagestotal.value.ul  = (ulong_t)total_pages;
 915         /*
 916          * pp_kernel represents total pages used by the kernel since the
 917          * startup. This formula takes into account the boottime kernel
 918          * footprint and also considers the availrmem changes because of
 919          * user explicit page locking.
 920          */
 921         system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
 922             obp_pages - availrmem - k_anoninfo.ani_mem_resv -
 923             anon_segkp_pages_locked - pages_locked -
 924             pages_claimed - pages_useclaim);
 925 
 926         return (0);
 927 }
 928 
 929 kstat_t *
 930 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
 931     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
 932 {
 933         return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
 934             ks_type, ks_ndata, ks_flags, ALL_ZONES));
 935 }
 936 
 937 /*
 938  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
 939  * the specified name exists, reactivate it.  Returns a pointer to the kstat
 940  * on success, NULL on failure.  The kstat will not be visible to the
 941  * kstat driver until kstat_install().
 942  */
 943 kstat_t *
 944 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
 945     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
 946     zoneid_t ks_zoneid)
 947 {
 948         size_t ks_data_size;
 949         kstat_t *ksp;
 950         ekstat_t *e;
 951         avl_index_t where;
 952         char namebuf[KSTAT_STRLEN + 16];
 953 
 954         if (avl_numnodes(&kstat_avl_bykid) == 0) {
 955                 avl_create(&kstat_avl_bykid, kstat_compare_bykid,
 956                     sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
 957 
 958                 avl_create(&kstat_avl_byname, kstat_compare_byname,
 959                     sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
 960         }
 961 
 962         /*
 963          * If ks_name == NULL, set the ks_name to <module><instance>.
 964          */
 965         if (ks_name == NULL) {
 966                 char buf[KSTAT_STRLEN];
 967                 kstat_set_string(buf, ks_module);
 968                 (void) sprintf(namebuf, "%s%d", buf, ks_instance);
 969                 ks_name = namebuf;
 970         }
 971 
 972         /*
 973          * Make sure it's a valid kstat data type
 974          */
 975         if (ks_type >= KSTAT_NUM_TYPES) {
 976                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 977                     "invalid kstat type %d",
 978                     ks_module, ks_instance, ks_name, ks_type);
 979                 return (NULL);
 980         }
 981 
 982         /*
 983          * Don't allow persistent virtual kstats -- it makes no sense.
 984          * ks_data points to garbage when the client goes away.
 985          */
 986         if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
 987             (ks_flags & KSTAT_FLAG_VIRTUAL)) {
 988                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 989                     "cannot create persistent virtual kstat",
 990                     ks_module, ks_instance, ks_name);
 991                 return (NULL);
 992         }
 993 
 994         /*
 995          * Don't allow variable-size physical kstats, since the framework's
 996          * memory allocation for physical kstat data is fixed at creation time.
 997          */
 998         if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
 999             !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1000                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1001                     "cannot create variable-size physical kstat",
1002                     ks_module, ks_instance, ks_name);
1003                 return (NULL);
1004         }
1005 
1006         /*
1007          * Make sure the number of data fields is within legal range
1008          */
1009         if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1010             ks_ndata > kstat_data_type[ks_type].max_ndata) {
1011                 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1012                     "ks_ndata=%d out of range [%d, %d]",
1013                     ks_module, ks_instance, ks_name, (int)ks_ndata,
1014                     kstat_data_type[ks_type].min_ndata,
1015                     kstat_data_type[ks_type].max_ndata);
1016                 return (NULL);
1017         }
1018 
1019         ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1020 
1021         /*
1022          * If the named kstat already exists and is dormant, reactivate it.
1023          */
1024         ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1025         if (ksp != NULL) {
1026                 if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1027                         /*
1028                          * The named kstat exists but is not dormant --
1029                          * this is a kstat namespace collision.
1030                          */
1031                         kstat_rele(ksp);
1032                         cmn_err(CE_WARN,
1033                             "kstat_create('%s', %d, '%s'): namespace collision",
1034                             ks_module, ks_instance, ks_name);
1035                         return (NULL);
1036                 }
1037                 if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1038                     (ksp->ks_type != ks_type) ||
1039                     (ksp->ks_ndata != ks_ndata) ||
1040                     (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1041                         /*
1042                          * The name is the same, but the other key parameters
1043                          * differ from those of the dormant kstat -- bogus.
1044                          */
1045                         kstat_rele(ksp);
1046                         cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1047                             "invalid reactivation of dormant kstat",
1048                             ks_module, ks_instance, ks_name);
1049                         return (NULL);
1050                 }
1051                 /*
1052                  * Return dormant kstat pointer to caller.  As usual,
1053                  * the kstat is marked invalid until kstat_install().
1054                  */
1055                 ksp->ks_flags |= KSTAT_FLAG_INVALID;
1056                 kstat_rele(ksp);
1057                 return (ksp);
1058         }
1059 
1060         /*
1061          * Allocate memory for the new kstat header and, if this is a physical
1062          * kstat, the data section.
1063          */
1064         e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1065         if (e == NULL) {
1066                 cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1067                     "insufficient kernel memory",
1068                     ks_module, ks_instance, ks_name);
1069                 return (NULL);
1070         }
1071 
1072         /*
1073          * Initialize as many fields as we can.  The caller may reset
1074          * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1075          * Creators of virtual kstats may also reset ks_data.  It is
1076          * also up to the caller to initialize the kstat data section,
1077          * if necessary.  All initialization must be complete before
1078          * calling kstat_install().
1079          */
1080         e->e_zone.zoneid = ks_zoneid;
1081         e->e_zone.next = NULL;
1082 
1083         ksp = &e->e_ks;
1084         ksp->ks_crtime               = gethrtime();
1085         kstat_set_string(ksp->ks_module, ks_module);
1086         ksp->ks_instance     = ks_instance;
1087         kstat_set_string(ksp->ks_name, ks_name);
1088         ksp->ks_type         = ks_type;
1089         kstat_set_string(ksp->ks_class, ks_class);
1090         ksp->ks_flags                = ks_flags | KSTAT_FLAG_INVALID;
1091         if (ks_flags & KSTAT_FLAG_VIRTUAL)
1092                 ksp->ks_data = NULL;
1093         else
1094                 ksp->ks_data = (void *)(e + 1);
1095         ksp->ks_ndata                = ks_ndata;
1096         ksp->ks_data_size    = ks_data_size;
1097         ksp->ks_snaptime     = ksp->ks_crtime;
1098         ksp->ks_update               = default_kstat_update;
1099         ksp->ks_private              = NULL;
1100         ksp->ks_snapshot     = default_kstat_snapshot;
1101         ksp->ks_lock         = NULL;
1102 
1103         mutex_enter(&kstat_chain_lock);
1104 
1105         /*
1106          * Add our kstat to the AVL trees.
1107          */
1108         if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1109                 mutex_exit(&kstat_chain_lock);
1110                 cmn_err(CE_WARN,
1111                     "kstat_create('%s', %d, '%s'): namespace collision",
1112                     ks_module, ks_instance, ks_name);
1113                 kstat_free(e);
1114                 return (NULL);
1115         }
1116         avl_insert(&kstat_avl_byname, e, where);
1117 
1118         /*
1119          * Loop around until we find an unused KID.
1120          */
1121         do {
1122                 ksp->ks_kid = kstat_chain_id++;
1123         } while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1124         avl_insert(&kstat_avl_bykid, e, where);
1125 
1126         mutex_exit(&kstat_chain_lock);
1127 
1128         return (ksp);
1129 }
1130 
1131 /*
1132  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1133  */
1134 void
1135 kstat_install(kstat_t *ksp)
1136 {
1137         zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1138 
1139         /*
1140          * If this is a variable-size kstat, it MUST provide kstat data locking
1141          * to prevent data-size races with kstat readers.
1142          */
1143         if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1144                 panic("kstat_install('%s', %d, '%s'): "
1145                     "cannot create variable-size kstat without data lock",
1146                     ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1147         }
1148 
1149         if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1150                 cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1151                     (void *)ksp);
1152                 return;
1153         }
1154 
1155         if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1156                 uint_t i;
1157                 kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1158 
1159                 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1160                         if (knp->data_type == KSTAT_DATA_STRING) {
1161                                 ksp->ks_flags |= KSTAT_FLAG_LONGSTRINGS;
1162                                 break;
1163                         }
1164                 }
1165                 /*
1166                  * The default snapshot routine does not handle KSTAT_WRITE
1167                  * for long strings.
1168                  */
1169                 if ((ksp->ks_flags & KSTAT_FLAG_LONGSTRINGS) &&
1170                     (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1171                     (ksp->ks_snapshot == default_kstat_snapshot)) {
1172                         panic("kstat_install('%s', %d, '%s'): "
1173                             "named kstat containing KSTAT_DATA_STRING "
1174                             "is writable but uses default snapshot routine",
1175                             ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1176                 }
1177         }
1178 
1179         if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1180 
1181                 /*
1182                  * We are reactivating a dormant kstat.  Initialize the
1183                  * caller's underlying data to the value it had when the
1184                  * kstat went dormant, and mark the kstat as active.
1185                  * Grab the provider's kstat lock if it's not already held.
1186                  */
1187                 kmutex_t *lp = ksp->ks_lock;
1188                 if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1189                         mutex_enter(lp);
1190                         (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1191                         mutex_exit(lp);
1192                 } else {
1193                         (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1194                 }
1195                 ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1196         }
1197 
1198         /*
1199          * Now that the kstat is active, make it visible to the kstat driver.
1200          * When copying out kstats the count is determined in
1201          * header_kstat_update() and actually copied into kbuf in
1202          * header_kstat_snapshot(). kstat_chain_lock is held across the two
1203          * calls to ensure that this list doesn't change. Thus, we need to
1204          * also take the lock to ensure that the we don't copy the new kstat
1205          * in the 2nd pass and overrun the buf.
1206          */
1207         mutex_enter(&kstat_chain_lock);
1208         ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1209         mutex_exit(&kstat_chain_lock);
1210         kstat_rele(ksp);
1211 }
1212 
1213 /*
1214  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1215  * just update the data and mark it as dormant.
1216  */
1217 void
1218 kstat_delete(kstat_t *ksp)
1219 {
1220         kmutex_t *lp;
1221         ekstat_t *e = (ekstat_t *)ksp;
1222         zoneid_t zoneid;
1223         kstat_zone_t *kz;
1224 
1225         ASSERT(ksp != NULL);
1226 
1227         if (ksp == NULL)
1228                 return;
1229 
1230         zoneid = e->e_zone.zoneid;
1231 
1232         lp = ksp->ks_lock;
1233 
1234         if (lp != NULL && MUTEX_HELD(lp)) {
1235                 panic("kstat_delete(%p): caller holds data lock %p",
1236                     (void *)ksp, (void *)lp);
1237         }
1238 
1239         if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1240                 cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1241                     (void *)ksp);
1242                 return;
1243         }
1244 
1245         if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1246                 /*
1247                  * Update the data one last time, so that all activity
1248                  * prior to going dormant has been accounted for.
1249                  */
1250                 KSTAT_ENTER(ksp);
1251                 (void) KSTAT_UPDATE(ksp, KSTAT_READ);
1252                 KSTAT_EXIT(ksp);
1253 
1254                 /*
1255                  * Mark the kstat as dormant and restore caller-modifiable
1256                  * fields to default values, so the kstat is readable during
1257                  * the dormant phase.
1258                  */
1259                 ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1260                 ksp->ks_lock = NULL;
1261                 ksp->ks_update = default_kstat_update;
1262                 ksp->ks_private = NULL;
1263                 ksp->ks_snapshot = default_kstat_snapshot;
1264                 kstat_rele(ksp);
1265                 return;
1266         }
1267 
1268         /*
1269          * Remove the kstat from the framework's AVL trees,
1270          * free the allocated memory, and increment kstat_chain_id so
1271          * /dev/kstat clients can detect the event.
1272          */
1273         mutex_enter(&kstat_chain_lock);
1274         avl_remove(&kstat_avl_bykid, e);
1275         avl_remove(&kstat_avl_byname, e);
1276         kstat_chain_id++;
1277         mutex_exit(&kstat_chain_lock);
1278 
1279         kz = e->e_zone.next;
1280         while (kz != NULL) {
1281                 kstat_zone_t *t = kz;
1282 
1283                 kz = kz->next;
1284                 kmem_free(t, sizeof (*t));
1285         }
1286         kstat_rele(ksp);
1287         kstat_free(e);
1288 }
1289 
1290 void
1291 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1292     const char *ks_name, zoneid_t ks_zoneid)
1293 {
1294         kstat_t *ksp;
1295 
1296         ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1297         if (ksp != NULL) {
1298                 kstat_rele(ksp);
1299                 kstat_delete(ksp);
1300         }
1301 }
1302 
1303 void
1304 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1305 {
1306         kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1307 }
1308 
1309 /*
1310  * The sparc V9 versions of these routines can be much cheaper than
1311  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1312  * For simplicity, however, we always feed the C versions to lint.
1313  */
1314 #if !defined(__sparc) || defined(lint) || defined(__lint)
1315 
1316 void
1317 kstat_waitq_enter(kstat_io_t *kiop)
1318 {
1319         hrtime_t new, delta;
1320         ulong_t wcnt;
1321 
1322         new = gethrtime_unscaled();
1323         delta = new - kiop->wlastupdate;
1324         kiop->wlastupdate = new;
1325         wcnt = kiop->wcnt++;
1326         if (wcnt != 0) {
1327                 kiop->wlentime += delta * wcnt;
1328                 kiop->wtime += delta;
1329         }
1330 }
1331 
1332 void
1333 kstat_waitq_exit(kstat_io_t *kiop)
1334 {
1335         hrtime_t new, delta;
1336         ulong_t wcnt;
1337 
1338         new = gethrtime_unscaled();
1339         delta = new - kiop->wlastupdate;
1340         kiop->wlastupdate = new;
1341         wcnt = kiop->wcnt--;
1342         ASSERT((int)wcnt > 0);
1343         kiop->wlentime += delta * wcnt;
1344         kiop->wtime += delta;
1345 }
1346 
1347 void
1348 kstat_runq_enter(kstat_io_t *kiop)
1349 {
1350         hrtime_t new, delta;
1351         ulong_t rcnt;
1352 
1353         new = gethrtime_unscaled();
1354         delta = new - kiop->rlastupdate;
1355         kiop->rlastupdate = new;
1356         rcnt = kiop->rcnt++;
1357         if (rcnt != 0) {
1358                 kiop->rlentime += delta * rcnt;
1359                 kiop->rtime += delta;
1360         }
1361 }
1362 
1363 void
1364 kstat_runq_exit(kstat_io_t *kiop)
1365 {
1366         hrtime_t new, delta;
1367         ulong_t rcnt;
1368 
1369         new = gethrtime_unscaled();
1370         delta = new - kiop->rlastupdate;
1371         kiop->rlastupdate = new;
1372         rcnt = kiop->rcnt--;
1373         ASSERT((int)rcnt > 0);
1374         kiop->rlentime += delta * rcnt;
1375         kiop->rtime += delta;
1376 }
1377 
1378 void
1379 kstat_waitq_to_runq(kstat_io_t *kiop)
1380 {
1381         hrtime_t new, delta;
1382         ulong_t wcnt, rcnt;
1383 
1384         new = gethrtime_unscaled();
1385 
1386         delta = new - kiop->wlastupdate;
1387         kiop->wlastupdate = new;
1388         wcnt = kiop->wcnt--;
1389         ASSERT((int)wcnt > 0);
1390         kiop->wlentime += delta * wcnt;
1391         kiop->wtime += delta;
1392 
1393         delta = new - kiop->rlastupdate;
1394         kiop->rlastupdate = new;
1395         rcnt = kiop->rcnt++;
1396         if (rcnt != 0) {
1397                 kiop->rlentime += delta * rcnt;
1398                 kiop->rtime += delta;
1399         }
1400 }
1401 
1402 void
1403 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1404 {
1405         hrtime_t new, delta;
1406         ulong_t wcnt, rcnt;
1407 
1408         new = gethrtime_unscaled();
1409 
1410         delta = new - kiop->rlastupdate;
1411         kiop->rlastupdate = new;
1412         rcnt = kiop->rcnt--;
1413         ASSERT((int)rcnt > 0);
1414         kiop->rlentime += delta * rcnt;
1415         kiop->rtime += delta;
1416 
1417         delta = new - kiop->wlastupdate;
1418         kiop->wlastupdate = new;
1419         wcnt = kiop->wcnt++;
1420         if (wcnt != 0) {
1421                 kiop->wlentime += delta * wcnt;
1422                 kiop->wtime += delta;
1423         }
1424 }
1425 
1426 #endif
1427 
1428 void
1429 kstat_timer_start(kstat_timer_t *ktp)
1430 {
1431         ktp->start_time = gethrtime();
1432 }
1433 
1434 void
1435 kstat_timer_stop(kstat_timer_t *ktp)
1436 {
1437         hrtime_t        etime;
1438         u_longlong_t    num_events;
1439 
1440         ktp->stop_time = etime = gethrtime();
1441         etime -= ktp->start_time;
1442         num_events = ktp->num_events;
1443         if (etime < ktp->min_time || num_events == 0)
1444                 ktp->min_time = etime;
1445         if (etime > ktp->max_time)
1446                 ktp->max_time = etime;
1447         ktp->elapsed_time += etime;
1448         ktp->num_events = num_events + 1;
1449 }