1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2016 Garrett D'Amore
  27  */
  28 
  29 #ifndef _SYS_KSTAT_H
  30 #define _SYS_KSTAT_H
  31 
  32 /*
  33  * Definition of general kernel statistics structures and /dev/kstat ioctls
  34  */
  35 
  36 #include <sys/types.h>
  37 #include <sys/time.h>
  38 
  39 #ifdef  __cplusplus
  40 extern "C" {
  41 #endif
  42 
  43 typedef int     kid_t;          /* unique kstat id */
  44 
  45 /*
  46  * Kernel statistics driver (/dev/kstat) ioctls
  47  */
  48 
  49 #define KSTAT_IOC_BASE          ('K' << 8)
  50 
  51 #define KSTAT_IOC_CHAIN_ID      KSTAT_IOC_BASE | 0x01
  52 #define KSTAT_IOC_READ          KSTAT_IOC_BASE | 0x02
  53 #define KSTAT_IOC_WRITE         KSTAT_IOC_BASE | 0x03
  54 
  55 /*
  56  * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
  57  *
  58  *      kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
  59  *      kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
  60  *      kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
  61  */
  62 
  63 #define KSTAT_STRLEN    31      /* 30 chars + NULL; must be 16 * n - 1 */
  64 
  65 /*
  66  * The generic kstat header
  67  */
  68 
  69 typedef struct kstat {
  70         /*
  71          * Fields relevant to both kernel and user
  72          */
  73         hrtime_t        ks_crtime;      /* creation time (from gethrtime()) */
  74         struct kstat    *ks_next;       /* kstat chain linkage */
  75         kid_t           ks_kid;         /* unique kstat ID */
  76         char            ks_module[KSTAT_STRLEN]; /* provider module name */
  77         uchar_t         ks_resv;        /* reserved, currently just padding */
  78         int             ks_instance;    /* provider module's instance */
  79         char            ks_name[KSTAT_STRLEN]; /* kstat name */
  80         uchar_t         ks_type;        /* kstat data type */
  81         char            ks_class[KSTAT_STRLEN]; /* kstat class */
  82         uchar_t         ks_flags;       /* kstat flags */
  83         void            *ks_data;       /* kstat type-specific data */
  84         uint_t          ks_ndata;       /* # of type-specific data records */
  85         size_t          ks_data_size;   /* total size of kstat data section */
  86         hrtime_t        ks_snaptime;    /* time of last data shapshot */
  87         /*
  88          * Fields relevant to kernel only
  89          */
  90         int             (*ks_update)(struct kstat *, int); /* dynamic update */
  91         void            *ks_private;    /* arbitrary provider-private data */
  92         int             (*ks_snapshot)(struct kstat *, void *, int);
  93         void            *ks_lock;       /* protects this kstat's data */
  94 } kstat_t;
  95 
  96 #ifdef _SYSCALL32
  97 
  98 typedef int32_t kid32_t;
  99 
 100 typedef struct kstat32 {
 101         /*
 102          * Fields relevant to both kernel and user
 103          */
 104         hrtime_t        ks_crtime;
 105         caddr32_t       ks_next;                /* struct kstat pointer */
 106         kid32_t         ks_kid;
 107         char            ks_module[KSTAT_STRLEN];
 108         uint8_t         ks_resv;
 109         int32_t         ks_instance;
 110         char            ks_name[KSTAT_STRLEN];
 111         uint8_t         ks_type;
 112         char            ks_class[KSTAT_STRLEN];
 113         uint8_t         ks_flags;
 114         caddr32_t       ks_data;                /* type-specific data */
 115         uint32_t        ks_ndata;
 116         size32_t        ks_data_size;
 117         hrtime_t        ks_snaptime;
 118         /*
 119          * Fields relevant to kernel only (only needed here for padding)
 120          */
 121         int32_t         _ks_update;
 122         caddr32_t       _ks_private;
 123         int32_t         _ks_snapshot;
 124         caddr32_t       _ks_lock;
 125 } kstat32_t;
 126 
 127 #endif  /* _SYSCALL32 */
 128 
 129 /*
 130  * kstat structure and locking strategy
 131  *
 132  * Each kstat consists of a header section (a kstat_t) and a data section.
 133  * The system maintains a set of kstats, protected by kstat_chain_lock.
 134  * kstat_chain_lock protects all additions to/deletions from this set,
 135  * as well as all changes to kstat headers.  kstat data sections are
 136  * *optionally* protected by the per-kstat ks_lock.  If ks_lock is non-NULL,
 137  * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
 138  * operations on that kstat.  It is up to the kstat provider to decide whether
 139  * guaranteeing consistent data to kstat clients is sufficiently important
 140  * to justify the locking cost.  Note, however, that most statistic updates
 141  * already occur under one of the provider's mutexes, so if the provider sets
 142  * ks_lock to point to that mutex, then kstat data locking is free.
 143  *
 144  * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
 145  * data-size races with kstat clients.
 146  *
 147  * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
 148  * in the kstat header so that users don't have to be exposed to all of the
 149  * kernel's lock-related data structures.
 150  */
 151 
 152 #if     defined(_KERNEL)
 153 
 154 #define KSTAT_ENTER(k)  \
 155         { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
 156 
 157 #define KSTAT_EXIT(k)   \
 158         { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
 159 
 160 #define KSTAT_UPDATE(k, rw)             (*(k)->ks_update)((k), (rw))
 161 
 162 #define KSTAT_SNAPSHOT(k, buf, rw)      (*(k)->ks_snapshot)((k), (buf), (rw))
 163 
 164 #endif  /* defined(_KERNEL) */
 165 
 166 /*
 167  * kstat time
 168  *
 169  * All times associated with kstats (e.g. creation time, snapshot time,
 170  * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
 171  * as returned by gethrtime().  The accuracy of these timestamps is machine
 172  * dependent, but the precision (units) is the same across all platforms.
 173  */
 174 
 175 /*
 176  * kstat identity (KID)
 177  *
 178  * Each kstat is assigned a unique KID (kstat ID) when it is added to the
 179  * global kstat chain.  The KID is used as a cookie by /dev/kstat to
 180  * request information about the corresponding kstat.  There is also
 181  * an identity associated with the entire kstat chain, kstat_chain_id,
 182  * which is bumped each time a kstat is added or deleted.  /dev/kstat uses
 183  * the chain ID to detect changes in the kstat chain (e.g., a new disk
 184  * coming online) between ioctl()s.
 185  */
 186 
 187 /*
 188  * kstat module, kstat instance
 189  *
 190  * ks_module and ks_instance contain the name and instance of the module
 191  * that created the kstat.  In cases where there can only be one instance,
 192  * ks_instance is 0.  The kernel proper (/kernel/unix) uses "unix" as its
 193  * module name.
 194  */
 195 
 196 /*
 197  * kstat name
 198  *
 199  * ks_name gives a meaningful name to a kstat.  The full kstat namespace
 200  * is module.instance.name, so the name only need be unique within a
 201  * module.  kstat_create() will fail if you try to create a kstat with
 202  * an already-used (ks_module, ks_instance, ks_name) triplet.  Spaces are
 203  * allowed in kstat names, but strongly discouraged, since they hinder
 204  * awk-style processing at user level.
 205  */
 206 
 207 /*
 208  * kstat type
 209  *
 210  * The kstat mechanism provides several flavors of kstat data, defined
 211  * below.  The "raw" kstat type is just treated as an array of bytes; you
 212  * can use this to export any kind of data you want.
 213  *
 214  * Some kstat types allow multiple data structures per kstat, e.g.
 215  * KSTAT_TYPE_NAMED; others do not.  This is part of the spec for each
 216  * kstat data type.
 217  *
 218  * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES.  To
 219  * get this information, read out the standard system kstat "kstat_types".
 220  */
 221 
 222 #define KSTAT_TYPE_RAW          0       /* can be anything */
 223                                         /* ks_ndata >= 1 */
 224 #define KSTAT_TYPE_NAMED        1       /* name/value pair */
 225                                         /* ks_ndata >= 1 */
 226 #define KSTAT_TYPE_INTR         2       /* interrupt statistics */
 227                                         /* ks_ndata == 1 */
 228 #define KSTAT_TYPE_IO           3       /* I/O statistics */
 229                                         /* ks_ndata == 1 */
 230 #define KSTAT_TYPE_TIMER        4       /* event timer */
 231                                         /* ks_ndata >= 1 */
 232 
 233 #define KSTAT_NUM_TYPES         5
 234 
 235 /*
 236  * kstat class
 237  *
 238  * Each kstat can be characterized as belonging to some broad class
 239  * of statistics, e.g. disk, tape, net, vm, streams, etc.  This field
 240  * can be used as a filter to extract related kstats.  The following
 241  * values are currently in use: disk, tape, net, controller, vm, kvm,
 242  * hat, streams, kstat, and misc.  (The kstat class encompasses things
 243  * like kstat_types.)
 244  */
 245 
 246 /*
 247  * kstat flags
 248  *
 249  * Any of the following flags may be passed to kstat_create().  They are
 250  * all zero by default.
 251  *
 252  *      KSTAT_FLAG_VIRTUAL:
 253  *
 254  *              Tells kstat_create() not to allocate memory for the
 255  *              kstat data section; instead, you will set the ks_data
 256  *              field to point to the data you wish to export.  This
 257  *              provides a convenient way to export existing data
 258  *              structures.
 259  *
 260  *      KSTAT_FLAG_VAR_SIZE:
 261  *
 262  *              The size of the kstat you are creating will vary over time.
 263  *              For example, you may want to use the kstat mechanism to
 264  *              export a linked list.  NOTE: The kstat framework does not
 265  *              manage the data section, so all variable-size kstats must be
 266  *              virtual kstats.  Moreover, variable-size kstats MUST employ
 267  *              kstat data locking to prevent data-size races with kstat
 268  *              clients.  See the section on "kstat snapshot" for details.
 269  *
 270  *      KSTAT_FLAG_WRITABLE:
 271  *
 272  *              Makes the kstat's data section writable by root.
 273  *              The ks_snapshot routine (see below) does not need to check for
 274  *              this; permission checking is handled in the kstat driver.
 275  *
 276  *      KSTAT_FLAG_PERSISTENT:
 277  *
 278  *              Indicates that this kstat is to be persistent over time.
 279  *              For persistent kstats, kstat_delete() simply marks the
 280  *              kstat as dormant; a subsequent kstat_create() reactivates
 281  *              the kstat.  This feature is provided so that statistics
 282  *              are not lost across driver close/open (e.g., raw disk I/O
 283  *              on a disk with no mounted partitions.)
 284  *              NOTE: Persistent kstats cannot be virtual, since ks_data
 285  *              points to garbage as soon as the driver goes away.
 286  *
 287  * The following flags are maintained by the kstat framework:
 288  *
 289  *      KSTAT_FLAG_DORMANT:
 290  *
 291  *              For persistent kstats, indicates that the kstat is in the
 292  *              dormant state (e.g., the corresponding device is closed).
 293  *
 294  *      KSTAT_FLAG_INVALID:
 295  *
 296  *              This flag is set when a kstat is in a transitional state,
 297  *              e.g. between kstat_create() and kstat_install().
 298  *              kstat clients must not attempt to access the kstat's data
 299  *              if this flag is set.
 300  *
 301  *      KSTAT_FLAG_LONGSTRINGS:
 302  *
 303  *              Indicates that this kstat contains long strings (which
 304  *              are stored outside of the kstat data section). When copied
 305  *              out to user space the string data will be held in the data
 306  *              section provided by the user.
 307  */
 308 
 309 #define KSTAT_FLAG_VIRTUAL              0x01
 310 #define KSTAT_FLAG_VAR_SIZE             0x02
 311 #define KSTAT_FLAG_WRITABLE             0x04
 312 #define KSTAT_FLAG_PERSISTENT           0x08
 313 #define KSTAT_FLAG_DORMANT              0x10
 314 #define KSTAT_FLAG_INVALID              0x20
 315 #define KSTAT_FLAG_LONGSTRINGS          0x40
 316 
 317 /*
 318  * Dynamic update support
 319  *
 320  * The kstat mechanism allows for an optional ks_update function to update
 321  * kstat data.  This is useful for drivers where the underlying device
 322  * keeps cheap hardware stats, but extraction is expensive.  Instead of
 323  * constantly keeping the kstat data section up to date, you can supply a
 324  * ks_update function which updates the kstat's data section on demand.
 325  * To take advantage of this feature, simply set the ks_update field before
 326  * calling kstat_install().
 327  *
 328  * The ks_update function, if supplied, must have the following structure:
 329  *
 330  *      int
 331  *      foo_kstat_update(kstat_t *ksp, int rw)
 332  *      {
 333  *              if (rw == KSTAT_WRITE) {
 334  *                      ... update the native stats from ksp->ks_data;
 335  *                              return EACCES if you don't support this
 336  *              } else {
 337  *                      ... update ksp->ks_data from the native stats
 338  *              }
 339  *      }
 340  *
 341  * The ks_update return codes are: 0 for success, EACCES if you don't allow
 342  * KSTAT_WRITE, and EIO for any other type of error.
 343  *
 344  * In general, the ks_update function may need to refer to provider-private
 345  * data; for example, it may need a pointer to the provider's raw statistics.
 346  * The ks_private field is available for this purpose.  Its use is entirely
 347  * at the provider's discretion.
 348  *
 349  * All variable-size kstats MUST supply a ks_update routine, which computes
 350  * and sets ks_data_size (and ks_ndata if that is meaningful), since these
 351  * are needed to perform kstat snapshots (see below).
 352  *
 353  * No kstat locking should be done inside the ks_update routine.  The caller
 354  * will already be holding the kstat's ks_lock (to ensure consistent data).
 355  */
 356 
 357 #define KSTAT_READ      0
 358 #define KSTAT_WRITE     1
 359 
 360 /*
 361  * Kstat snapshot
 362  *
 363  * In order to get a consistent view of a kstat's data, clients must obey
 364  * the kstat's locking strategy.  However, these clients may need to perform
 365  * operations on the data which could cause a fault (e.g. copyout()), or
 366  * operations which are simply expensive.  Doing so could cause deadlock
 367  * (e.g. if you're holding a disk's kstat lock which is ultimately required
 368  * to resolve a copyout() fault), performance degradation (since the providers'
 369  * activity is serialized at the kstat lock), device timing problems, etc.
 370  *
 371  * To avoid these problems, kstat data is provided via snapshots.  Taking
 372  * a snapshot is a simple process: allocate a wired-down kernel buffer,
 373  * acquire the kstat's data lock, copy the data into the buffer ("take the
 374  * snapshot"), and release the lock.  This ensures that the kstat's data lock
 375  * will be held as briefly as possible, and that no faults will occur while
 376  * the lock is held.
 377  *
 378  * Normally, the snapshot is taken by default_kstat_snapshot(), which
 379  * timestamps the data (sets ks_snaptime), copies it, and does a little
 380  * massaging to deal with incomplete transactions on i/o kstats.  However,
 381  * this routine only works for kstats with contiguous data (the typical case).
 382  * If you create a kstat whose data is, say, a linked list, you must provide
 383  * your own ks_snapshot routine.  The routine you supply must have the
 384  * following prototype (replace "foo" with something appropriate):
 385  *
 386  *      int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
 387  *
 388  * The minimal snapshot routine -- one which copies contiguous data that
 389  * doesn't need any massaging -- would be this:
 390  *
 391  *      ksp->ks_snaptime = gethrtime();
 392  *      if (rw == KSTAT_WRITE)
 393  *              bcopy(buf, ksp->ks_data, ksp->ks_data_size);
 394  *      else
 395  *              bcopy(ksp->ks_data, buf, ksp->ks_data_size);
 396  *      return (0);
 397  *
 398  * A more illuminating example is taking a snapshot of a linked list:
 399  *
 400  *      ksp->ks_snaptime = gethrtime();
 401  *      if (rw == KSTAT_WRITE)
 402  *              return (EACCES);                ... See below ...
 403  *      for (foo = first_foo; foo; foo = foo->next) {
 404  *              bcopy((char *) foo, (char *) buf, sizeof (struct foo));
 405  *              buf = ((struct foo *) buf) + 1;
 406  *      }
 407  *      return (0);
 408  *
 409  * In the example above, we have decided that we don't want to allow
 410  * KSTAT_WRITE access, so we return EACCES if this is attempted.
 411  *
 412  * The key points are:
 413  *
 414  *      (1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
 415  *      (2) Data gets copied from the kstat to the buffer on KSTAT_READ,
 416  *              and from the buffer to the kstat on KSTAT_WRITE.
 417  *      (3) ks_snapshot return values are: 0 for success, EACCES if you
 418  *              don't allow KSTAT_WRITE, and EIO for any other type of error.
 419  *
 420  * Named kstats (see section on "Named statistics" below) containing long
 421  * strings (KSTAT_DATA_STRING) need special handling.  The kstat driver
 422  * assumes that all strings are copied into the buffer after the array of
 423  * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
 424  * into the copy within the buffer. The default snapshot routine does this,
 425  * but overriding routines should contain at least the following:
 426  *
 427  * if (rw == KSTAT_READ) {
 428  *      kstat_named_t *knp = buf;
 429  *      char *end = knp + ksp->ks_ndata;
 430  *      uint_t i;
 431  *
 432  *      ... Do the regular copy ...
 433  *      bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata);
 434  *
 435  *      for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 436  *              if (knp[i].data_type == KSTAT_DATA_STRING &&
 437  *                  KSTAT_NAMED_STR_PTR(knp) != NULL) {
 438  *                      bcopy(KSTAT_NAMED_STR_PTR(knp), end,
 439  *                          KSTAT_NAMED_STR_BUFLEN(knp));
 440  *                      KSTAT_NAMED_STR_PTR(knp) = end;
 441  *                      end += KSTAT_NAMED_STR_BUFLEN(knp);
 442  *              }
 443  *      }
 444  */
 445 
 446 /*
 447  * Named statistics.
 448  *
 449  * List of arbitrary name=value statistics.
 450  */
 451 
 452 typedef struct kstat_named {
 453         char    name[KSTAT_STRLEN];     /* name of counter */
 454         uchar_t data_type;              /* data type */
 455         union {
 456                 char            c[16];  /* enough for 128-bit ints */
 457                 int32_t         i32;
 458                 uint32_t        ui32;
 459                 struct {
 460                         union {
 461                                 char            *ptr;   /* NULL-term string */
 462 #if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
 463                                 caddr32_t       ptr32;
 464 #endif
 465                                 char            __pad[8]; /* 64-bit padding */
 466                         } addr;
 467                         uint32_t        len;    /* # bytes for strlen + '\0' */
 468                 } str;
 469 /*
 470  * The int64_t and uint64_t types are not valid for a maximally conformant
 471  * 32-bit compilation environment (cc -Xc) using compilers prior to the
 472  * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
 473  * In these cases, the visibility of i64 and ui64 is only permitted for
 474  * 64-bit compilation environments or 32-bit non-maximally conformant
 475  * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
 476  * C99 ANSI C compilation environment, the long long type is supported.
 477  * The _INT64_TYPE is defined by the implementation (see sys/int_types.h).
 478  */
 479 #if defined(_INT64_TYPE)
 480                 int64_t         i64;
 481                 uint64_t        ui64;
 482 #endif
 483                 long            l;
 484                 ulong_t         ul;
 485 
 486                 hrtime_t        t;
 487 
 488                 /* These structure members are obsolete */
 489 
 490                 longlong_t      ll;
 491                 u_longlong_t    ull;
 492                 float           f;
 493                 double          d;
 494         } value;                        /* value of counter */
 495 } kstat_named_t;
 496 
 497 #define KSTAT_DATA_CHAR         0
 498 #define KSTAT_DATA_INT32        1
 499 #define KSTAT_DATA_UINT32       2
 500 #define KSTAT_DATA_INT64        3
 501 #define KSTAT_DATA_UINT64       4
 502 
 503 #if !defined(_LP64)
 504 #define KSTAT_DATA_LONG         KSTAT_DATA_INT32
 505 #define KSTAT_DATA_ULONG        KSTAT_DATA_UINT32
 506 #else
 507 #if !defined(_KERNEL)
 508 #define KSTAT_DATA_LONG         KSTAT_DATA_INT64
 509 #define KSTAT_DATA_ULONG        KSTAT_DATA_UINT64
 510 #else
 511 #define KSTAT_DATA_LONG         7       /* only visible to the kernel */
 512 #define KSTAT_DATA_ULONG        8       /* only visible to the kernel */
 513 #endif  /* !_KERNEL */
 514 #endif  /* !_LP64 */
 515 
 516 /*
 517  * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
 518  * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
 519  * (kstat_named_t)).  ks_data_size in these cases is equal to the sum of the
 520  * amount of space required to store the strings (ie, the sum of
 521  * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
 522  * space required to store the kstat_named_t's.
 523  *
 524  * The default update routine will update ks_data_size automatically for
 525  * variable-length kstats containing long strings (using the default update
 526  * routine only makes sense if the string is the only thing that is changing
 527  * in size, and ks_ndata is constant).  Fixed-length kstats containing long
 528  * strings must explicitly change ks_data_size (after creation but before
 529  * initialization) to reflect the correct amount of space required for the
 530  * long strings and the kstat_named_t's.
 531  */
 532 #define KSTAT_DATA_STRING       9
 533 
 534 /*
 535  * Value is an hrtime_t, in seconds since system boot.
 536  */
 537 #define KSTAT_DATA_TIME         10
 538 
 539 /* These types are obsolete */
 540 
 541 #define KSTAT_DATA_LONGLONG     KSTAT_DATA_INT64
 542 #define KSTAT_DATA_ULONGLONG    KSTAT_DATA_UINT64
 543 #define KSTAT_DATA_FLOAT        5
 544 #define KSTAT_DATA_DOUBLE       6
 545 
 546 #define KSTAT_NAMED_PTR(kptr)   ((kstat_named_t *)(kptr)->ks_data)
 547 
 548 /*
 549  * Retrieve the pointer of the string contained in the given named kstat.
 550  */
 551 #define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
 552 
 553 /*
 554  * Retrieve the length of the buffer required to store the string in the given
 555  * named kstat.
 556  */
 557 #define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
 558 
 559 /*
 560  * Interrupt statistics.
 561  *
 562  * An interrupt is a hard interrupt (sourced from the hardware device
 563  * itself), a soft interrupt (induced by the system via the use of
 564  * some system interrupt source), a watchdog interrupt (induced by
 565  * a periodic timer call), spurious (an interrupt entry point was
 566  * entered but there was no interrupt condition to service),
 567  * or multiple service (an interrupt condition was detected and
 568  * serviced just prior to returning from any of the other types).
 569  *
 570  * Measurement of the spurious class of interrupts is useful for
 571  * autovectored devices in order to pinpoint any interrupt latency
 572  * problems in a particular system configuration.
 573  *
 574  * Devices that have more than one interrupt of the same
 575  * type should use multiple structures.
 576  */
 577 
 578 #define KSTAT_INTR_HARD                 0
 579 #define KSTAT_INTR_SOFT                 1
 580 #define KSTAT_INTR_WATCHDOG             2
 581 #define KSTAT_INTR_SPURIOUS             3
 582 #define KSTAT_INTR_MULTSVC              4
 583 
 584 #define KSTAT_NUM_INTRS                 5
 585 
 586 typedef struct kstat_intr {
 587         uint_t  intrs[KSTAT_NUM_INTRS]; /* interrupt counters */
 588 } kstat_intr_t;
 589 
 590 #define KSTAT_INTR_PTR(kptr)    ((kstat_intr_t *)(kptr)->ks_data)
 591 
 592 /*
 593  * I/O statistics.
 594  */
 595 
 596 typedef struct kstat_io {
 597 
 598         /*
 599          * Basic counters.
 600          *
 601          * The counters should be updated at the end of service
 602          * (e.g., just prior to calling biodone()).
 603          */
 604 
 605         u_longlong_t    nread;          /* number of bytes read */
 606         u_longlong_t    nwritten;       /* number of bytes written */
 607         uint_t          reads;          /* number of read operations */
 608         uint_t          writes;         /* number of write operations */
 609 
 610         /*
 611          * Accumulated time and queue length statistics.
 612          *
 613          * Accumulated time statistics are kept as a running sum
 614          * of "active" time.  Queue length statistics are kept as a
 615          * running sum of the product of queue length and elapsed time
 616          * at that length -- i.e., a Riemann sum for queue length
 617          * integrated against time.  (You can also think of the active time
 618          * as a Riemann sum, for the boolean function (queue_length > 0)
 619          * integrated against time, or you can think of it as the
 620          * Lebesgue measure of the set on which queue_length > 0.)
 621          *
 622          *              ^
 623          *              |                       _________
 624          *              8                       | i4    |
 625          *              |                       |       |
 626          *      Queue   6                       |       |
 627          *      Length  |       _________       |       |
 628          *              4       | i2    |_______|       |
 629          *              |       |           i3          |
 630          *              2_______|                       |
 631          *              |    i1                         |
 632          *              |_______________________________|
 633          *              Time->       t1      t2      t3      t4
 634          *
 635          * At each change of state (entry or exit from the queue),
 636          * we add the elapsed time (since the previous state change)
 637          * to the active time if the queue length was non-zero during
 638          * that interval; and we add the product of the elapsed time
 639          * times the queue length to the running length*time sum.
 640          *
 641          * This method is generalizable to measuring residency
 642          * in any defined system: instead of queue lengths, think
 643          * of "outstanding RPC calls to server X".
 644          *
 645          * A large number of I/O subsystems have at least two basic
 646          * "lists" of transactions they manage: one for transactions
 647          * that have been accepted for processing but for which processing
 648          * has yet to begin, and one for transactions which are actively
 649          * being processed (but not done). For this reason, two cumulative
 650          * time statistics are defined here: wait (pre-service) time,
 651          * and run (service) time.
 652          *
 653          * All times are 64-bit nanoseconds (hrtime_t), as returned by
 654          * gethrtime().
 655          *
 656          * The units of cumulative busy time are accumulated nanoseconds.
 657          * The units of cumulative length*time products are elapsed time
 658          * times queue length.
 659          *
 660          * Updates to the fields below are performed implicitly by calls to
 661          * these five functions:
 662          *
 663          *      kstat_waitq_enter()
 664          *      kstat_waitq_exit()
 665          *      kstat_runq_enter()
 666          *      kstat_runq_exit()
 667          *
 668          *      kstat_waitq_to_runq()           (see below)
 669          *      kstat_runq_back_to_waitq()      (see below)
 670          *
 671          * Since kstat_waitq_exit() is typically followed immediately
 672          * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
 673          * function which performs both operations.  This is a performance
 674          * win since only one timestamp is required.
 675          *
 676          * In some instances, it may be necessary to move a request from
 677          * the run queue back to the wait queue, e.g. for write throttling.
 678          * For these situations, call kstat_runq_back_to_waitq().
 679          *
 680          * These fields should never be updated by any other means.
 681          */
 682 
 683         hrtime_t wtime;         /* cumulative wait (pre-service) time */
 684         hrtime_t wlentime;      /* cumulative wait length*time product */
 685         hrtime_t wlastupdate;   /* last time wait queue changed */
 686         hrtime_t rtime;         /* cumulative run (service) time */
 687         hrtime_t rlentime;      /* cumulative run length*time product */
 688         hrtime_t rlastupdate;   /* last time run queue changed */
 689 
 690         uint_t  wcnt;           /* count of elements in wait state */
 691         uint_t  rcnt;           /* count of elements in run state */
 692 
 693 } kstat_io_t;
 694 
 695 #define KSTAT_IO_PTR(kptr)      ((kstat_io_t *)(kptr)->ks_data)
 696 
 697 /*
 698  * Event timer statistics - cumulative elapsed time and number of events.
 699  *
 700  * Updates to these fields are performed implicitly by calls to
 701  * kstat_timer_start() and kstat_timer_stop().
 702  */
 703 
 704 typedef struct kstat_timer {
 705         char            name[KSTAT_STRLEN];     /* event name */
 706         uchar_t         resv;                   /* reserved */
 707         u_longlong_t    num_events;             /* number of events */
 708         hrtime_t        elapsed_time;           /* cumulative elapsed time */
 709         hrtime_t        min_time;               /* shortest event duration */
 710         hrtime_t        max_time;               /* longest event duration */
 711         hrtime_t        start_time;             /* previous event start time */
 712         hrtime_t        stop_time;              /* previous event stop time */
 713 } kstat_timer_t;
 714 
 715 #define KSTAT_TIMER_PTR(kptr)   ((kstat_timer_t *)(kptr)->ks_data)
 716 
 717 #if     defined(_KERNEL) || defined(_FAKE_KERNEL)
 718 
 719 #include <sys/t_lock.h>
 720 
 721 extern kid_t    kstat_chain_id;         /* bumped at each state change */
 722 extern void     kstat_init(void);       /* initialize kstat framework */
 723 
 724 /*
 725  * Adding and deleting kstats.
 726  *
 727  * The typical sequence to add a kstat is:
 728  *
 729  *      ksp = kstat_create(module, instance, name, class, type, ndata, flags);
 730  *      if (ksp) {
 731  *              ... provider initialization, if necessary
 732  *              kstat_install(ksp);
 733  *      }
 734  *
 735  * There are three logically distinct steps here:
 736  *
 737  * Step 1: System Initialization (kstat_create)
 738  *
 739  * kstat_create() performs system initialization.  kstat_create()
 740  * allocates memory for the entire kstat (header plus data), initializes
 741  * all header fields, initializes the data section to all zeroes, assigns
 742  * a unique KID, and puts the kstat onto the system's kstat chain.
 743  * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
 744  * because the provider (caller) has not yet had a chance to initialize
 745  * the data section.
 746  *
 747  * By default, kstats are exported to all zones on the system.  A kstat may be
 748  * created via kstat_create_zone() to specify a zone to which the statistics
 749  * should be exported.  kstat_zone_add() may be used to specify additional
 750  * zones to which the statistics are to be exported.
 751  *
 752  * Step 2: Provider Initialization
 753  *
 754  * The provider performs any necessary initialization of the data section,
 755  * e.g. setting the name fields in a KSTAT_TYPE_NAMED.  Virtual kstats set
 756  * the ks_data field at this time.  The provider may also set the ks_update,
 757  * ks_snapshot, ks_private, and ks_lock fields if necessary.
 758  *
 759  * Step 3: Installation (kstat_install)
 760  *
 761  * Once the kstat is completely initialized, kstat_install() clears the
 762  * INVALID flag, thus making the kstat accessible to the outside world.
 763  * kstat_install() also clears the DORMANT flag for persistent kstats.
 764  *
 765  * Removing a kstat from the system
 766  *
 767  * kstat_delete(ksp) removes ksp from the kstat chain and frees all
 768  * associated system resources.  NOTE: When you call kstat_delete(),
 769  * you must NOT be holding that kstat's ks_lock.  Otherwise, you may
 770  * deadlock with a kstat reader.
 771  *
 772  * Persistent kstats
 773  *
 774  * From the provider's point of view, persistence is transparent.  The only
 775  * difference between ephemeral (normal) kstats and persistent kstats
 776  * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create().  Magically,
 777  * this has the effect of making your data visible even when you're
 778  * not home.  Persistence is important to tools like iostat, which want
 779  * to get a meaningful picture of disk activity.  Without persistence,
 780  * raw disk i/o statistics could never accumulate: they would come and
 781  * go with each open/close of the raw device.
 782  *
 783  * The magic of persistence works by slightly altering the behavior of
 784  * kstat_create() and kstat_delete().  The first call to kstat_create()
 785  * creates a new kstat, as usual.  However, kstat_delete() does not
 786  * actually delete the kstat: it performs one final update of the data
 787  * (i.e., calls the ks_update routine), marks the kstat as dormant, and
 788  * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
 789  * to their default values (since they might otherwise point to garbage,
 790  * e.g. if the provider is going away).  kstat clients can still access
 791  * the dormant kstat just like a live kstat; they just continue to see
 792  * the final data values as long as the kstat remains dormant.
 793  * All subsequent kstat_create() calls simply find the already-existing,
 794  * dormant kstat and return a pointer to it, without altering any fields.
 795  * The provider then performs its usual initialization sequence, and
 796  * calls kstat_install().  kstat_install() uses the old data values to
 797  * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
 798  * thus making it seem like you were never gone.
 799  */
 800 
 801 extern kstat_t *kstat_create(const char *, int, const char *, const char *,
 802     uchar_t, uint_t, uchar_t);
 803 extern kstat_t *kstat_create_zone(const char *, int, const char *,
 804     const char *, uchar_t, uint_t, uchar_t, zoneid_t);
 805 extern void kstat_install(kstat_t *);
 806 extern void kstat_delete(kstat_t *);
 807 extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
 808 extern void kstat_set_string(char *, const char *);
 809 extern void kstat_delete_byname(const char *, int, const char *);
 810 extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
 811 extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
 812 extern void kstat_timer_init(kstat_timer_t *, const char *);
 813 extern void kstat_waitq_enter(kstat_io_t *);
 814 extern void kstat_waitq_exit(kstat_io_t *);
 815 extern void kstat_runq_enter(kstat_io_t *);
 816 extern void kstat_runq_exit(kstat_io_t *);
 817 extern void kstat_waitq_to_runq(kstat_io_t *);
 818 extern void kstat_runq_back_to_waitq(kstat_io_t *);
 819 extern void kstat_timer_start(kstat_timer_t *);
 820 extern void kstat_timer_stop(kstat_timer_t *);
 821 
 822 extern void kstat_zone_add(kstat_t *, zoneid_t);
 823 extern void kstat_zone_remove(kstat_t *, zoneid_t);
 824 extern int kstat_zone_find(kstat_t *, zoneid_t);
 825 
 826 extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
 827 extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
 828 extern void kstat_rele(kstat_t *);
 829 
 830 #endif  /* defined(_KERNEL) */
 831 
 832 #ifdef  __cplusplus
 833 }
 834 #endif
 835 
 836 #endif  /* _SYS_KSTAT_H */