1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2016 Garrett D'Amore 27 */ 28 29 #ifndef _SYS_KSTAT_H 30 #define _SYS_KSTAT_H 31 32 /* 33 * Definition of general kernel statistics structures and /dev/kstat ioctls 34 */ 35 36 #include <sys/types.h> 37 #include <sys/time.h> 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 typedef int kid_t; /* unique kstat id */ 44 45 /* 46 * Kernel statistics driver (/dev/kstat) ioctls 47 */ 48 49 #define KSTAT_IOC_BASE ('K' << 8) 50 51 #define KSTAT_IOC_CHAIN_ID KSTAT_IOC_BASE | 0x01 52 #define KSTAT_IOC_READ KSTAT_IOC_BASE | 0x02 53 #define KSTAT_IOC_WRITE KSTAT_IOC_BASE | 0x03 54 55 /* 56 * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor): 57 * 58 * kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL); 59 * kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *); 60 * kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *); 61 */ 62 63 #define KSTAT_STRLEN 31 /* 30 chars + NULL; must be 16 * n - 1 */ 64 65 /* 66 * The generic kstat header 67 */ 68 69 typedef struct kstat { 70 /* 71 * Fields relevant to both kernel and user 72 */ 73 hrtime_t ks_crtime; /* creation time (from gethrtime()) */ 74 struct kstat *ks_next; /* kstat chain linkage */ 75 kid_t ks_kid; /* unique kstat ID */ 76 char ks_module[KSTAT_STRLEN]; /* provider module name */ 77 uchar_t ks_resv; /* reserved, currently just padding */ 78 int ks_instance; /* provider module's instance */ 79 char ks_name[KSTAT_STRLEN]; /* kstat name */ 80 uchar_t ks_type; /* kstat data type */ 81 char ks_class[KSTAT_STRLEN]; /* kstat class */ 82 uchar_t ks_flags; /* kstat flags */ 83 void *ks_data; /* kstat type-specific data */ 84 uint_t ks_ndata; /* # of type-specific data records */ 85 size_t ks_data_size; /* total size of kstat data section */ 86 hrtime_t ks_snaptime; /* time of last data shapshot */ 87 /* 88 * Fields relevant to kernel only 89 */ 90 int (*ks_update)(struct kstat *, int); /* dynamic update */ 91 void *ks_private; /* arbitrary provider-private data */ 92 int (*ks_snapshot)(struct kstat *, void *, int); 93 void *ks_lock; /* protects this kstat's data */ 94 } kstat_t; 95 96 #ifdef _SYSCALL32 97 98 typedef int32_t kid32_t; 99 100 typedef struct kstat32 { 101 /* 102 * Fields relevant to both kernel and user 103 */ 104 hrtime_t ks_crtime; 105 caddr32_t ks_next; /* struct kstat pointer */ 106 kid32_t ks_kid; 107 char ks_module[KSTAT_STRLEN]; 108 uint8_t ks_resv; 109 int32_t ks_instance; 110 char ks_name[KSTAT_STRLEN]; 111 uint8_t ks_type; 112 char ks_class[KSTAT_STRLEN]; 113 uint8_t ks_flags; 114 caddr32_t ks_data; /* type-specific data */ 115 uint32_t ks_ndata; 116 size32_t ks_data_size; 117 hrtime_t ks_snaptime; 118 /* 119 * Fields relevant to kernel only (only needed here for padding) 120 */ 121 int32_t _ks_update; 122 caddr32_t _ks_private; 123 int32_t _ks_snapshot; 124 caddr32_t _ks_lock; 125 } kstat32_t; 126 127 #endif /* _SYSCALL32 */ 128 129 /* 130 * kstat structure and locking strategy 131 * 132 * Each kstat consists of a header section (a kstat_t) and a data section. 133 * The system maintains a set of kstats, protected by kstat_chain_lock. 134 * kstat_chain_lock protects all additions to/deletions from this set, 135 * as well as all changes to kstat headers. kstat data sections are 136 * *optionally* protected by the per-kstat ks_lock. If ks_lock is non-NULL, 137 * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their 138 * operations on that kstat. It is up to the kstat provider to decide whether 139 * guaranteeing consistent data to kstat clients is sufficiently important 140 * to justify the locking cost. Note, however, that most statistic updates 141 * already occur under one of the provider's mutexes, so if the provider sets 142 * ks_lock to point to that mutex, then kstat data locking is free. 143 * 144 * NOTE: variable-size kstats MUST employ kstat data locking, to prevent 145 * data-size races with kstat clients. 146 * 147 * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *) 148 * in the kstat header so that users don't have to be exposed to all of the 149 * kernel's lock-related data structures. 150 */ 151 152 #if defined(_KERNEL) 153 154 #define KSTAT_ENTER(k) \ 155 { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); } 156 157 #define KSTAT_EXIT(k) \ 158 { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); } 159 160 #define KSTAT_UPDATE(k, rw) (*(k)->ks_update)((k), (rw)) 161 162 #define KSTAT_SNAPSHOT(k, buf, rw) (*(k)->ks_snapshot)((k), (buf), (rw)) 163 164 #endif /* defined(_KERNEL) */ 165 166 /* 167 * kstat time 168 * 169 * All times associated with kstats (e.g. creation time, snapshot time, 170 * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values, 171 * as returned by gethrtime(). The accuracy of these timestamps is machine 172 * dependent, but the precision (units) is the same across all platforms. 173 */ 174 175 /* 176 * kstat identity (KID) 177 * 178 * Each kstat is assigned a unique KID (kstat ID) when it is added to the 179 * global kstat chain. The KID is used as a cookie by /dev/kstat to 180 * request information about the corresponding kstat. There is also 181 * an identity associated with the entire kstat chain, kstat_chain_id, 182 * which is bumped each time a kstat is added or deleted. /dev/kstat uses 183 * the chain ID to detect changes in the kstat chain (e.g., a new disk 184 * coming online) between ioctl()s. 185 */ 186 187 /* 188 * kstat module, kstat instance 189 * 190 * ks_module and ks_instance contain the name and instance of the module 191 * that created the kstat. In cases where there can only be one instance, 192 * ks_instance is 0. The kernel proper (/kernel/unix) uses "unix" as its 193 * module name. 194 */ 195 196 /* 197 * kstat name 198 * 199 * ks_name gives a meaningful name to a kstat. The full kstat namespace 200 * is module.instance.name, so the name only need be unique within a 201 * module. kstat_create() will fail if you try to create a kstat with 202 * an already-used (ks_module, ks_instance, ks_name) triplet. Spaces are 203 * allowed in kstat names, but strongly discouraged, since they hinder 204 * awk-style processing at user level. 205 */ 206 207 /* 208 * kstat type 209 * 210 * The kstat mechanism provides several flavors of kstat data, defined 211 * below. The "raw" kstat type is just treated as an array of bytes; you 212 * can use this to export any kind of data you want. 213 * 214 * Some kstat types allow multiple data structures per kstat, e.g. 215 * KSTAT_TYPE_NAMED; others do not. This is part of the spec for each 216 * kstat data type. 217 * 218 * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES. To 219 * get this information, read out the standard system kstat "kstat_types". 220 */ 221 222 #define KSTAT_TYPE_RAW 0 /* can be anything */ 223 /* ks_ndata >= 1 */ 224 #define KSTAT_TYPE_NAMED 1 /* name/value pair */ 225 /* ks_ndata >= 1 */ 226 #define KSTAT_TYPE_INTR 2 /* interrupt statistics */ 227 /* ks_ndata == 1 */ 228 #define KSTAT_TYPE_IO 3 /* I/O statistics */ 229 /* ks_ndata == 1 */ 230 #define KSTAT_TYPE_TIMER 4 /* event timer */ 231 /* ks_ndata >= 1 */ 232 233 #define KSTAT_NUM_TYPES 5 234 235 /* 236 * kstat class 237 * 238 * Each kstat can be characterized as belonging to some broad class 239 * of statistics, e.g. disk, tape, net, vm, streams, etc. This field 240 * can be used as a filter to extract related kstats. The following 241 * values are currently in use: disk, tape, net, controller, vm, kvm, 242 * hat, streams, kstat, and misc. (The kstat class encompasses things 243 * like kstat_types.) 244 */ 245 246 /* 247 * kstat flags 248 * 249 * Any of the following flags may be passed to kstat_create(). They are 250 * all zero by default. 251 * 252 * KSTAT_FLAG_VIRTUAL: 253 * 254 * Tells kstat_create() not to allocate memory for the 255 * kstat data section; instead, you will set the ks_data 256 * field to point to the data you wish to export. This 257 * provides a convenient way to export existing data 258 * structures. 259 * 260 * KSTAT_FLAG_VAR_SIZE: 261 * 262 * The size of the kstat you are creating will vary over time. 263 * For example, you may want to use the kstat mechanism to 264 * export a linked list. NOTE: The kstat framework does not 265 * manage the data section, so all variable-size kstats must be 266 * virtual kstats. Moreover, variable-size kstats MUST employ 267 * kstat data locking to prevent data-size races with kstat 268 * clients. See the section on "kstat snapshot" for details. 269 * 270 * KSTAT_FLAG_WRITABLE: 271 * 272 * Makes the kstat's data section writable by root. 273 * The ks_snapshot routine (see below) does not need to check for 274 * this; permission checking is handled in the kstat driver. 275 * 276 * KSTAT_FLAG_PERSISTENT: 277 * 278 * Indicates that this kstat is to be persistent over time. 279 * For persistent kstats, kstat_delete() simply marks the 280 * kstat as dormant; a subsequent kstat_create() reactivates 281 * the kstat. This feature is provided so that statistics 282 * are not lost across driver close/open (e.g., raw disk I/O 283 * on a disk with no mounted partitions.) 284 * NOTE: Persistent kstats cannot be virtual, since ks_data 285 * points to garbage as soon as the driver goes away. 286 * 287 * The following flags are maintained by the kstat framework: 288 * 289 * KSTAT_FLAG_DORMANT: 290 * 291 * For persistent kstats, indicates that the kstat is in the 292 * dormant state (e.g., the corresponding device is closed). 293 * 294 * KSTAT_FLAG_INVALID: 295 * 296 * This flag is set when a kstat is in a transitional state, 297 * e.g. between kstat_create() and kstat_install(). 298 * kstat clients must not attempt to access the kstat's data 299 * if this flag is set. 300 * 301 * KSTAT_FLAG_LONGSTRINGS: 302 * 303 * Indicates that this kstat contains long strings (which 304 * are stored outside of the kstat data section). When copied 305 * out to user space the string data will be held in the data 306 * section provided by the user. 307 */ 308 309 #define KSTAT_FLAG_VIRTUAL 0x01 310 #define KSTAT_FLAG_VAR_SIZE 0x02 311 #define KSTAT_FLAG_WRITABLE 0x04 312 #define KSTAT_FLAG_PERSISTENT 0x08 313 #define KSTAT_FLAG_DORMANT 0x10 314 #define KSTAT_FLAG_INVALID 0x20 315 #define KSTAT_FLAG_LONGSTRINGS 0x40 316 317 /* 318 * Dynamic update support 319 * 320 * The kstat mechanism allows for an optional ks_update function to update 321 * kstat data. This is useful for drivers where the underlying device 322 * keeps cheap hardware stats, but extraction is expensive. Instead of 323 * constantly keeping the kstat data section up to date, you can supply a 324 * ks_update function which updates the kstat's data section on demand. 325 * To take advantage of this feature, simply set the ks_update field before 326 * calling kstat_install(). 327 * 328 * The ks_update function, if supplied, must have the following structure: 329 * 330 * int 331 * foo_kstat_update(kstat_t *ksp, int rw) 332 * { 333 * if (rw == KSTAT_WRITE) { 334 * ... update the native stats from ksp->ks_data; 335 * return EACCES if you don't support this 336 * } else { 337 * ... update ksp->ks_data from the native stats 338 * } 339 * } 340 * 341 * The ks_update return codes are: 0 for success, EACCES if you don't allow 342 * KSTAT_WRITE, and EIO for any other type of error. 343 * 344 * In general, the ks_update function may need to refer to provider-private 345 * data; for example, it may need a pointer to the provider's raw statistics. 346 * The ks_private field is available for this purpose. Its use is entirely 347 * at the provider's discretion. 348 * 349 * All variable-size kstats MUST supply a ks_update routine, which computes 350 * and sets ks_data_size (and ks_ndata if that is meaningful), since these 351 * are needed to perform kstat snapshots (see below). 352 * 353 * No kstat locking should be done inside the ks_update routine. The caller 354 * will already be holding the kstat's ks_lock (to ensure consistent data). 355 */ 356 357 #define KSTAT_READ 0 358 #define KSTAT_WRITE 1 359 360 /* 361 * Kstat snapshot 362 * 363 * In order to get a consistent view of a kstat's data, clients must obey 364 * the kstat's locking strategy. However, these clients may need to perform 365 * operations on the data which could cause a fault (e.g. copyout()), or 366 * operations which are simply expensive. Doing so could cause deadlock 367 * (e.g. if you're holding a disk's kstat lock which is ultimately required 368 * to resolve a copyout() fault), performance degradation (since the providers' 369 * activity is serialized at the kstat lock), device timing problems, etc. 370 * 371 * To avoid these problems, kstat data is provided via snapshots. Taking 372 * a snapshot is a simple process: allocate a wired-down kernel buffer, 373 * acquire the kstat's data lock, copy the data into the buffer ("take the 374 * snapshot"), and release the lock. This ensures that the kstat's data lock 375 * will be held as briefly as possible, and that no faults will occur while 376 * the lock is held. 377 * 378 * Normally, the snapshot is taken by default_kstat_snapshot(), which 379 * timestamps the data (sets ks_snaptime), copies it, and does a little 380 * massaging to deal with incomplete transactions on i/o kstats. However, 381 * this routine only works for kstats with contiguous data (the typical case). 382 * If you create a kstat whose data is, say, a linked list, you must provide 383 * your own ks_snapshot routine. The routine you supply must have the 384 * following prototype (replace "foo" with something appropriate): 385 * 386 * int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 387 * 388 * The minimal snapshot routine -- one which copies contiguous data that 389 * doesn't need any massaging -- would be this: 390 * 391 * ksp->ks_snaptime = gethrtime(); 392 * if (rw == KSTAT_WRITE) 393 * bcopy(buf, ksp->ks_data, ksp->ks_data_size); 394 * else 395 * bcopy(ksp->ks_data, buf, ksp->ks_data_size); 396 * return (0); 397 * 398 * A more illuminating example is taking a snapshot of a linked list: 399 * 400 * ksp->ks_snaptime = gethrtime(); 401 * if (rw == KSTAT_WRITE) 402 * return (EACCES); ... See below ... 403 * for (foo = first_foo; foo; foo = foo->next) { 404 * bcopy((char *) foo, (char *) buf, sizeof (struct foo)); 405 * buf = ((struct foo *) buf) + 1; 406 * } 407 * return (0); 408 * 409 * In the example above, we have decided that we don't want to allow 410 * KSTAT_WRITE access, so we return EACCES if this is attempted. 411 * 412 * The key points are: 413 * 414 * (1) ks_snaptime must be set (via gethrtime()) to timestamp the data. 415 * (2) Data gets copied from the kstat to the buffer on KSTAT_READ, 416 * and from the buffer to the kstat on KSTAT_WRITE. 417 * (3) ks_snapshot return values are: 0 for success, EACCES if you 418 * don't allow KSTAT_WRITE, and EIO for any other type of error. 419 * 420 * Named kstats (see section on "Named statistics" below) containing long 421 * strings (KSTAT_DATA_STRING) need special handling. The kstat driver 422 * assumes that all strings are copied into the buffer after the array of 423 * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point 424 * into the copy within the buffer. The default snapshot routine does this, 425 * but overriding routines should contain at least the following: 426 * 427 * if (rw == KSTAT_READ) { 428 * kstat_named_t *knp = buf; 429 * char *end = knp + ksp->ks_ndata; 430 * uint_t i; 431 * 432 * ... Do the regular copy ... 433 * bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata); 434 * 435 * for (i = 0; i < ksp->ks_ndata; i++, knp++) { 436 * if (knp[i].data_type == KSTAT_DATA_STRING && 437 * KSTAT_NAMED_STR_PTR(knp) != NULL) { 438 * bcopy(KSTAT_NAMED_STR_PTR(knp), end, 439 * KSTAT_NAMED_STR_BUFLEN(knp)); 440 * KSTAT_NAMED_STR_PTR(knp) = end; 441 * end += KSTAT_NAMED_STR_BUFLEN(knp); 442 * } 443 * } 444 */ 445 446 /* 447 * Named statistics. 448 * 449 * List of arbitrary name=value statistics. 450 */ 451 452 typedef struct kstat_named { 453 char name[KSTAT_STRLEN]; /* name of counter */ 454 uchar_t data_type; /* data type */ 455 union { 456 char c[16]; /* enough for 128-bit ints */ 457 int32_t i32; 458 uint32_t ui32; 459 struct { 460 union { 461 char *ptr; /* NULL-term string */ 462 #if defined(_KERNEL) && defined(_MULTI_DATAMODEL) 463 caddr32_t ptr32; 464 #endif 465 char __pad[8]; /* 64-bit padding */ 466 } addr; 467 uint32_t len; /* # bytes for strlen + '\0' */ 468 } str; 469 /* 470 * The int64_t and uint64_t types are not valid for a maximally conformant 471 * 32-bit compilation environment (cc -Xc) using compilers prior to the 472 * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990). 473 * In these cases, the visibility of i64 and ui64 is only permitted for 474 * 64-bit compilation environments or 32-bit non-maximally conformant 475 * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the 476 * C99 ANSI C compilation environment, the long long type is supported. 477 * The _INT64_TYPE is defined by the implementation (see sys/int_types.h). 478 */ 479 #if defined(_INT64_TYPE) 480 int64_t i64; 481 uint64_t ui64; 482 #endif 483 long l; 484 ulong_t ul; 485 486 hrtime_t t; 487 488 /* These structure members are obsolete */ 489 490 longlong_t ll; 491 u_longlong_t ull; 492 float f; 493 double d; 494 } value; /* value of counter */ 495 } kstat_named_t; 496 497 #define KSTAT_DATA_CHAR 0 498 #define KSTAT_DATA_INT32 1 499 #define KSTAT_DATA_UINT32 2 500 #define KSTAT_DATA_INT64 3 501 #define KSTAT_DATA_UINT64 4 502 503 #if !defined(_LP64) 504 #define KSTAT_DATA_LONG KSTAT_DATA_INT32 505 #define KSTAT_DATA_ULONG KSTAT_DATA_UINT32 506 #else 507 #if !defined(_KERNEL) 508 #define KSTAT_DATA_LONG KSTAT_DATA_INT64 509 #define KSTAT_DATA_ULONG KSTAT_DATA_UINT64 510 #else 511 #define KSTAT_DATA_LONG 7 /* only visible to the kernel */ 512 #define KSTAT_DATA_ULONG 8 /* only visible to the kernel */ 513 #endif /* !_KERNEL */ 514 #endif /* !_LP64 */ 515 516 /* 517 * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING) 518 * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof 519 * (kstat_named_t)). ks_data_size in these cases is equal to the sum of the 520 * amount of space required to store the strings (ie, the sum of 521 * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the 522 * space required to store the kstat_named_t's. 523 * 524 * The default update routine will update ks_data_size automatically for 525 * variable-length kstats containing long strings (using the default update 526 * routine only makes sense if the string is the only thing that is changing 527 * in size, and ks_ndata is constant). Fixed-length kstats containing long 528 * strings must explicitly change ks_data_size (after creation but before 529 * initialization) to reflect the correct amount of space required for the 530 * long strings and the kstat_named_t's. 531 */ 532 #define KSTAT_DATA_STRING 9 533 534 /* 535 * Value is an hrtime_t, in seconds since system boot. 536 */ 537 #define KSTAT_DATA_TIME 10 538 539 /* These types are obsolete */ 540 541 #define KSTAT_DATA_LONGLONG KSTAT_DATA_INT64 542 #define KSTAT_DATA_ULONGLONG KSTAT_DATA_UINT64 543 #define KSTAT_DATA_FLOAT 5 544 #define KSTAT_DATA_DOUBLE 6 545 546 #define KSTAT_NAMED_PTR(kptr) ((kstat_named_t *)(kptr)->ks_data) 547 548 /* 549 * Retrieve the pointer of the string contained in the given named kstat. 550 */ 551 #define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr) 552 553 /* 554 * Retrieve the length of the buffer required to store the string in the given 555 * named kstat. 556 */ 557 #define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len) 558 559 /* 560 * Interrupt statistics. 561 * 562 * An interrupt is a hard interrupt (sourced from the hardware device 563 * itself), a soft interrupt (induced by the system via the use of 564 * some system interrupt source), a watchdog interrupt (induced by 565 * a periodic timer call), spurious (an interrupt entry point was 566 * entered but there was no interrupt condition to service), 567 * or multiple service (an interrupt condition was detected and 568 * serviced just prior to returning from any of the other types). 569 * 570 * Measurement of the spurious class of interrupts is useful for 571 * autovectored devices in order to pinpoint any interrupt latency 572 * problems in a particular system configuration. 573 * 574 * Devices that have more than one interrupt of the same 575 * type should use multiple structures. 576 */ 577 578 #define KSTAT_INTR_HARD 0 579 #define KSTAT_INTR_SOFT 1 580 #define KSTAT_INTR_WATCHDOG 2 581 #define KSTAT_INTR_SPURIOUS 3 582 #define KSTAT_INTR_MULTSVC 4 583 584 #define KSTAT_NUM_INTRS 5 585 586 typedef struct kstat_intr { 587 uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */ 588 } kstat_intr_t; 589 590 #define KSTAT_INTR_PTR(kptr) ((kstat_intr_t *)(kptr)->ks_data) 591 592 /* 593 * I/O statistics. 594 */ 595 596 typedef struct kstat_io { 597 598 /* 599 * Basic counters. 600 * 601 * The counters should be updated at the end of service 602 * (e.g., just prior to calling biodone()). 603 */ 604 605 u_longlong_t nread; /* number of bytes read */ 606 u_longlong_t nwritten; /* number of bytes written */ 607 uint_t reads; /* number of read operations */ 608 uint_t writes; /* number of write operations */ 609 610 /* 611 * Accumulated time and queue length statistics. 612 * 613 * Accumulated time statistics are kept as a running sum 614 * of "active" time. Queue length statistics are kept as a 615 * running sum of the product of queue length and elapsed time 616 * at that length -- i.e., a Riemann sum for queue length 617 * integrated against time. (You can also think of the active time 618 * as a Riemann sum, for the boolean function (queue_length > 0) 619 * integrated against time, or you can think of it as the 620 * Lebesgue measure of the set on which queue_length > 0.) 621 * 622 * ^ 623 * | _________ 624 * 8 | i4 | 625 * | | | 626 * Queue 6 | | 627 * Length | _________ | | 628 * 4 | i2 |_______| | 629 * | | i3 | 630 * 2_______| | 631 * | i1 | 632 * |_______________________________| 633 * Time-> t1 t2 t3 t4 634 * 635 * At each change of state (entry or exit from the queue), 636 * we add the elapsed time (since the previous state change) 637 * to the active time if the queue length was non-zero during 638 * that interval; and we add the product of the elapsed time 639 * times the queue length to the running length*time sum. 640 * 641 * This method is generalizable to measuring residency 642 * in any defined system: instead of queue lengths, think 643 * of "outstanding RPC calls to server X". 644 * 645 * A large number of I/O subsystems have at least two basic 646 * "lists" of transactions they manage: one for transactions 647 * that have been accepted for processing but for which processing 648 * has yet to begin, and one for transactions which are actively 649 * being processed (but not done). For this reason, two cumulative 650 * time statistics are defined here: wait (pre-service) time, 651 * and run (service) time. 652 * 653 * All times are 64-bit nanoseconds (hrtime_t), as returned by 654 * gethrtime(). 655 * 656 * The units of cumulative busy time are accumulated nanoseconds. 657 * The units of cumulative length*time products are elapsed time 658 * times queue length. 659 * 660 * Updates to the fields below are performed implicitly by calls to 661 * these five functions: 662 * 663 * kstat_waitq_enter() 664 * kstat_waitq_exit() 665 * kstat_runq_enter() 666 * kstat_runq_exit() 667 * 668 * kstat_waitq_to_runq() (see below) 669 * kstat_runq_back_to_waitq() (see below) 670 * 671 * Since kstat_waitq_exit() is typically followed immediately 672 * by kstat_runq_enter(), there is a single kstat_waitq_to_runq() 673 * function which performs both operations. This is a performance 674 * win since only one timestamp is required. 675 * 676 * In some instances, it may be necessary to move a request from 677 * the run queue back to the wait queue, e.g. for write throttling. 678 * For these situations, call kstat_runq_back_to_waitq(). 679 * 680 * These fields should never be updated by any other means. 681 */ 682 683 hrtime_t wtime; /* cumulative wait (pre-service) time */ 684 hrtime_t wlentime; /* cumulative wait length*time product */ 685 hrtime_t wlastupdate; /* last time wait queue changed */ 686 hrtime_t rtime; /* cumulative run (service) time */ 687 hrtime_t rlentime; /* cumulative run length*time product */ 688 hrtime_t rlastupdate; /* last time run queue changed */ 689 690 uint_t wcnt; /* count of elements in wait state */ 691 uint_t rcnt; /* count of elements in run state */ 692 693 } kstat_io_t; 694 695 #define KSTAT_IO_PTR(kptr) ((kstat_io_t *)(kptr)->ks_data) 696 697 /* 698 * Event timer statistics - cumulative elapsed time and number of events. 699 * 700 * Updates to these fields are performed implicitly by calls to 701 * kstat_timer_start() and kstat_timer_stop(). 702 */ 703 704 typedef struct kstat_timer { 705 char name[KSTAT_STRLEN]; /* event name */ 706 uchar_t resv; /* reserved */ 707 u_longlong_t num_events; /* number of events */ 708 hrtime_t elapsed_time; /* cumulative elapsed time */ 709 hrtime_t min_time; /* shortest event duration */ 710 hrtime_t max_time; /* longest event duration */ 711 hrtime_t start_time; /* previous event start time */ 712 hrtime_t stop_time; /* previous event stop time */ 713 } kstat_timer_t; 714 715 #define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data) 716 717 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 718 719 #include <sys/t_lock.h> 720 721 extern kid_t kstat_chain_id; /* bumped at each state change */ 722 extern void kstat_init(void); /* initialize kstat framework */ 723 724 /* 725 * Adding and deleting kstats. 726 * 727 * The typical sequence to add a kstat is: 728 * 729 * ksp = kstat_create(module, instance, name, class, type, ndata, flags); 730 * if (ksp) { 731 * ... provider initialization, if necessary 732 * kstat_install(ksp); 733 * } 734 * 735 * There are three logically distinct steps here: 736 * 737 * Step 1: System Initialization (kstat_create) 738 * 739 * kstat_create() performs system initialization. kstat_create() 740 * allocates memory for the entire kstat (header plus data), initializes 741 * all header fields, initializes the data section to all zeroes, assigns 742 * a unique KID, and puts the kstat onto the system's kstat chain. 743 * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set), 744 * because the provider (caller) has not yet had a chance to initialize 745 * the data section. 746 * 747 * By default, kstats are exported to all zones on the system. A kstat may be 748 * created via kstat_create_zone() to specify a zone to which the statistics 749 * should be exported. kstat_zone_add() may be used to specify additional 750 * zones to which the statistics are to be exported. 751 * 752 * Step 2: Provider Initialization 753 * 754 * The provider performs any necessary initialization of the data section, 755 * e.g. setting the name fields in a KSTAT_TYPE_NAMED. Virtual kstats set 756 * the ks_data field at this time. The provider may also set the ks_update, 757 * ks_snapshot, ks_private, and ks_lock fields if necessary. 758 * 759 * Step 3: Installation (kstat_install) 760 * 761 * Once the kstat is completely initialized, kstat_install() clears the 762 * INVALID flag, thus making the kstat accessible to the outside world. 763 * kstat_install() also clears the DORMANT flag for persistent kstats. 764 * 765 * Removing a kstat from the system 766 * 767 * kstat_delete(ksp) removes ksp from the kstat chain and frees all 768 * associated system resources. NOTE: When you call kstat_delete(), 769 * you must NOT be holding that kstat's ks_lock. Otherwise, you may 770 * deadlock with a kstat reader. 771 * 772 * Persistent kstats 773 * 774 * From the provider's point of view, persistence is transparent. The only 775 * difference between ephemeral (normal) kstats and persistent kstats 776 * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create(). Magically, 777 * this has the effect of making your data visible even when you're 778 * not home. Persistence is important to tools like iostat, which want 779 * to get a meaningful picture of disk activity. Without persistence, 780 * raw disk i/o statistics could never accumulate: they would come and 781 * go with each open/close of the raw device. 782 * 783 * The magic of persistence works by slightly altering the behavior of 784 * kstat_create() and kstat_delete(). The first call to kstat_create() 785 * creates a new kstat, as usual. However, kstat_delete() does not 786 * actually delete the kstat: it performs one final update of the data 787 * (i.e., calls the ks_update routine), marks the kstat as dormant, and 788 * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back 789 * to their default values (since they might otherwise point to garbage, 790 * e.g. if the provider is going away). kstat clients can still access 791 * the dormant kstat just like a live kstat; they just continue to see 792 * the final data values as long as the kstat remains dormant. 793 * All subsequent kstat_create() calls simply find the already-existing, 794 * dormant kstat and return a pointer to it, without altering any fields. 795 * The provider then performs its usual initialization sequence, and 796 * calls kstat_install(). kstat_install() uses the old data values to 797 * initialize the native data (i.e., ks_update is called with KSTAT_WRITE), 798 * thus making it seem like you were never gone. 799 */ 800 801 extern kstat_t *kstat_create(const char *, int, const char *, const char *, 802 uchar_t, uint_t, uchar_t); 803 extern kstat_t *kstat_create_zone(const char *, int, const char *, 804 const char *, uchar_t, uint_t, uchar_t, zoneid_t); 805 extern void kstat_install(kstat_t *); 806 extern void kstat_delete(kstat_t *); 807 extern void kstat_named_setstr(kstat_named_t *knp, const char *src); 808 extern void kstat_set_string(char *, const char *); 809 extern void kstat_delete_byname(const char *, int, const char *); 810 extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t); 811 extern void kstat_named_init(kstat_named_t *, const char *, uchar_t); 812 extern void kstat_timer_init(kstat_timer_t *, const char *); 813 extern void kstat_waitq_enter(kstat_io_t *); 814 extern void kstat_waitq_exit(kstat_io_t *); 815 extern void kstat_runq_enter(kstat_io_t *); 816 extern void kstat_runq_exit(kstat_io_t *); 817 extern void kstat_waitq_to_runq(kstat_io_t *); 818 extern void kstat_runq_back_to_waitq(kstat_io_t *); 819 extern void kstat_timer_start(kstat_timer_t *); 820 extern void kstat_timer_stop(kstat_timer_t *); 821 822 extern void kstat_zone_add(kstat_t *, zoneid_t); 823 extern void kstat_zone_remove(kstat_t *, zoneid_t); 824 extern int kstat_zone_find(kstat_t *, zoneid_t); 825 826 extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t); 827 extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t); 828 extern void kstat_rele(kstat_t *); 829 830 #endif /* defined(_KERNEL) */ 831 832 #ifdef __cplusplus 833 } 834 #endif 835 836 #endif /* _SYS_KSTAT_H */