Print this page
codereview and testing fixes.
6558 kstat: desire type for timestamps
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/sys/kstat.h
+++ new/usr/src/uts/common/sys/kstat.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 *
25 25 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
26 + * Copyright 2016 Garrett D'Amore
26 27 */
27 28
28 29 #ifndef _SYS_KSTAT_H
29 30 #define _SYS_KSTAT_H
30 31
31 32 /*
32 33 * Definition of general kernel statistics structures and /dev/kstat ioctls
33 34 */
34 35
35 36 #include <sys/types.h>
36 37 #include <sys/time.h>
37 38
38 39 #ifdef __cplusplus
39 40 extern "C" {
40 41 #endif
41 42
42 43 typedef int kid_t; /* unique kstat id */
43 44
44 45 /*
45 46 * Kernel statistics driver (/dev/kstat) ioctls
46 47 */
47 48
48 49 #define KSTAT_IOC_BASE ('K' << 8)
49 50
50 51 #define KSTAT_IOC_CHAIN_ID KSTAT_IOC_BASE | 0x01
51 52 #define KSTAT_IOC_READ KSTAT_IOC_BASE | 0x02
52 53 #define KSTAT_IOC_WRITE KSTAT_IOC_BASE | 0x03
53 54
54 55 /*
55 56 * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
56 57 *
57 58 * kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
58 59 * kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
59 60 * kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
60 61 */
61 62
62 63 #define KSTAT_STRLEN 31 /* 30 chars + NULL; must be 16 * n - 1 */
63 64
64 65 /*
65 66 * The generic kstat header
66 67 */
67 68
68 69 typedef struct kstat {
69 70 /*
70 71 * Fields relevant to both kernel and user
71 72 */
72 73 hrtime_t ks_crtime; /* creation time (from gethrtime()) */
73 74 struct kstat *ks_next; /* kstat chain linkage */
74 75 kid_t ks_kid; /* unique kstat ID */
75 76 char ks_module[KSTAT_STRLEN]; /* provider module name */
76 77 uchar_t ks_resv; /* reserved, currently just padding */
77 78 int ks_instance; /* provider module's instance */
78 79 char ks_name[KSTAT_STRLEN]; /* kstat name */
79 80 uchar_t ks_type; /* kstat data type */
80 81 char ks_class[KSTAT_STRLEN]; /* kstat class */
81 82 uchar_t ks_flags; /* kstat flags */
82 83 void *ks_data; /* kstat type-specific data */
83 84 uint_t ks_ndata; /* # of type-specific data records */
84 85 size_t ks_data_size; /* total size of kstat data section */
85 86 hrtime_t ks_snaptime; /* time of last data shapshot */
86 87 /*
87 88 * Fields relevant to kernel only
88 89 */
89 90 int (*ks_update)(struct kstat *, int); /* dynamic update */
90 91 void *ks_private; /* arbitrary provider-private data */
91 92 int (*ks_snapshot)(struct kstat *, void *, int);
92 93 void *ks_lock; /* protects this kstat's data */
93 94 } kstat_t;
94 95
95 96 #ifdef _SYSCALL32
96 97
97 98 typedef int32_t kid32_t;
98 99
99 100 typedef struct kstat32 {
100 101 /*
101 102 * Fields relevant to both kernel and user
102 103 */
103 104 hrtime_t ks_crtime;
104 105 caddr32_t ks_next; /* struct kstat pointer */
105 106 kid32_t ks_kid;
106 107 char ks_module[KSTAT_STRLEN];
107 108 uint8_t ks_resv;
108 109 int32_t ks_instance;
109 110 char ks_name[KSTAT_STRLEN];
110 111 uint8_t ks_type;
111 112 char ks_class[KSTAT_STRLEN];
112 113 uint8_t ks_flags;
113 114 caddr32_t ks_data; /* type-specific data */
114 115 uint32_t ks_ndata;
115 116 size32_t ks_data_size;
116 117 hrtime_t ks_snaptime;
117 118 /*
118 119 * Fields relevant to kernel only (only needed here for padding)
119 120 */
120 121 int32_t _ks_update;
121 122 caddr32_t _ks_private;
122 123 int32_t _ks_snapshot;
123 124 caddr32_t _ks_lock;
124 125 } kstat32_t;
125 126
126 127 #endif /* _SYSCALL32 */
127 128
128 129 /*
129 130 * kstat structure and locking strategy
130 131 *
131 132 * Each kstat consists of a header section (a kstat_t) and a data section.
132 133 * The system maintains a set of kstats, protected by kstat_chain_lock.
133 134 * kstat_chain_lock protects all additions to/deletions from this set,
134 135 * as well as all changes to kstat headers. kstat data sections are
135 136 * *optionally* protected by the per-kstat ks_lock. If ks_lock is non-NULL,
136 137 * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
137 138 * operations on that kstat. It is up to the kstat provider to decide whether
138 139 * guaranteeing consistent data to kstat clients is sufficiently important
139 140 * to justify the locking cost. Note, however, that most statistic updates
140 141 * already occur under one of the provider's mutexes, so if the provider sets
141 142 * ks_lock to point to that mutex, then kstat data locking is free.
142 143 *
143 144 * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
144 145 * data-size races with kstat clients.
145 146 *
146 147 * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
147 148 * in the kstat header so that users don't have to be exposed to all of the
148 149 * kernel's lock-related data structures.
149 150 */
150 151
151 152 #if defined(_KERNEL)
152 153
153 154 #define KSTAT_ENTER(k) \
154 155 { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
155 156
156 157 #define KSTAT_EXIT(k) \
157 158 { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
158 159
159 160 #define KSTAT_UPDATE(k, rw) (*(k)->ks_update)((k), (rw))
160 161
161 162 #define KSTAT_SNAPSHOT(k, buf, rw) (*(k)->ks_snapshot)((k), (buf), (rw))
162 163
163 164 #endif /* defined(_KERNEL) */
164 165
165 166 /*
166 167 * kstat time
167 168 *
168 169 * All times associated with kstats (e.g. creation time, snapshot time,
169 170 * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
170 171 * as returned by gethrtime(). The accuracy of these timestamps is machine
171 172 * dependent, but the precision (units) is the same across all platforms.
172 173 */
173 174
174 175 /*
175 176 * kstat identity (KID)
176 177 *
177 178 * Each kstat is assigned a unique KID (kstat ID) when it is added to the
178 179 * global kstat chain. The KID is used as a cookie by /dev/kstat to
179 180 * request information about the corresponding kstat. There is also
180 181 * an identity associated with the entire kstat chain, kstat_chain_id,
181 182 * which is bumped each time a kstat is added or deleted. /dev/kstat uses
182 183 * the chain ID to detect changes in the kstat chain (e.g., a new disk
183 184 * coming online) between ioctl()s.
184 185 */
185 186
186 187 /*
187 188 * kstat module, kstat instance
188 189 *
189 190 * ks_module and ks_instance contain the name and instance of the module
190 191 * that created the kstat. In cases where there can only be one instance,
191 192 * ks_instance is 0. The kernel proper (/kernel/unix) uses "unix" as its
192 193 * module name.
193 194 */
194 195
195 196 /*
196 197 * kstat name
197 198 *
198 199 * ks_name gives a meaningful name to a kstat. The full kstat namespace
199 200 * is module.instance.name, so the name only need be unique within a
200 201 * module. kstat_create() will fail if you try to create a kstat with
201 202 * an already-used (ks_module, ks_instance, ks_name) triplet. Spaces are
202 203 * allowed in kstat names, but strongly discouraged, since they hinder
203 204 * awk-style processing at user level.
204 205 */
205 206
206 207 /*
207 208 * kstat type
208 209 *
209 210 * The kstat mechanism provides several flavors of kstat data, defined
210 211 * below. The "raw" kstat type is just treated as an array of bytes; you
211 212 * can use this to export any kind of data you want.
212 213 *
213 214 * Some kstat types allow multiple data structures per kstat, e.g.
214 215 * KSTAT_TYPE_NAMED; others do not. This is part of the spec for each
215 216 * kstat data type.
216 217 *
217 218 * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES. To
218 219 * get this information, read out the standard system kstat "kstat_types".
219 220 */
220 221
221 222 #define KSTAT_TYPE_RAW 0 /* can be anything */
222 223 /* ks_ndata >= 1 */
223 224 #define KSTAT_TYPE_NAMED 1 /* name/value pair */
224 225 /* ks_ndata >= 1 */
225 226 #define KSTAT_TYPE_INTR 2 /* interrupt statistics */
226 227 /* ks_ndata == 1 */
227 228 #define KSTAT_TYPE_IO 3 /* I/O statistics */
228 229 /* ks_ndata == 1 */
229 230 #define KSTAT_TYPE_TIMER 4 /* event timer */
230 231 /* ks_ndata >= 1 */
231 232
232 233 #define KSTAT_NUM_TYPES 5
233 234
234 235 /*
235 236 * kstat class
236 237 *
237 238 * Each kstat can be characterized as belonging to some broad class
238 239 * of statistics, e.g. disk, tape, net, vm, streams, etc. This field
239 240 * can be used as a filter to extract related kstats. The following
240 241 * values are currently in use: disk, tape, net, controller, vm, kvm,
241 242 * hat, streams, kstat, and misc. (The kstat class encompasses things
242 243 * like kstat_types.)
243 244 */
244 245
245 246 /*
246 247 * kstat flags
247 248 *
248 249 * Any of the following flags may be passed to kstat_create(). They are
249 250 * all zero by default.
250 251 *
251 252 * KSTAT_FLAG_VIRTUAL:
252 253 *
253 254 * Tells kstat_create() not to allocate memory for the
254 255 * kstat data section; instead, you will set the ks_data
255 256 * field to point to the data you wish to export. This
256 257 * provides a convenient way to export existing data
257 258 * structures.
258 259 *
259 260 * KSTAT_FLAG_VAR_SIZE:
260 261 *
261 262 * The size of the kstat you are creating will vary over time.
262 263 * For example, you may want to use the kstat mechanism to
263 264 * export a linked list. NOTE: The kstat framework does not
264 265 * manage the data section, so all variable-size kstats must be
265 266 * virtual kstats. Moreover, variable-size kstats MUST employ
266 267 * kstat data locking to prevent data-size races with kstat
267 268 * clients. See the section on "kstat snapshot" for details.
268 269 *
269 270 * KSTAT_FLAG_WRITABLE:
270 271 *
271 272 * Makes the kstat's data section writable by root.
272 273 * The ks_snapshot routine (see below) does not need to check for
273 274 * this; permission checking is handled in the kstat driver.
274 275 *
275 276 * KSTAT_FLAG_PERSISTENT:
276 277 *
277 278 * Indicates that this kstat is to be persistent over time.
278 279 * For persistent kstats, kstat_delete() simply marks the
279 280 * kstat as dormant; a subsequent kstat_create() reactivates
280 281 * the kstat. This feature is provided so that statistics
281 282 * are not lost across driver close/open (e.g., raw disk I/O
282 283 * on a disk with no mounted partitions.)
283 284 * NOTE: Persistent kstats cannot be virtual, since ks_data
284 285 * points to garbage as soon as the driver goes away.
285 286 *
286 287 * The following flags are maintained by the kstat framework:
287 288 *
288 289 * KSTAT_FLAG_DORMANT:
289 290 *
290 291 * For persistent kstats, indicates that the kstat is in the
291 292 * dormant state (e.g., the corresponding device is closed).
292 293 *
293 294 * KSTAT_FLAG_INVALID:
294 295 *
295 296 * This flag is set when a kstat is in a transitional state,
296 297 * e.g. between kstat_create() and kstat_install().
297 298 * kstat clients must not attempt to access the kstat's data
298 299 * if this flag is set.
299 300 *
300 301 * KSTAT_FLAG_LONGSTRINGS:
301 302 *
302 303 * Indicates that this kstat contains long strings (which
303 304 * are stored outside of the kstat data section). When copied
304 305 * out to user space the string data will be held in the data
305 306 * section provided by the user.
306 307 */
307 308
308 309 #define KSTAT_FLAG_VIRTUAL 0x01
309 310 #define KSTAT_FLAG_VAR_SIZE 0x02
310 311 #define KSTAT_FLAG_WRITABLE 0x04
311 312 #define KSTAT_FLAG_PERSISTENT 0x08
312 313 #define KSTAT_FLAG_DORMANT 0x10
313 314 #define KSTAT_FLAG_INVALID 0x20
314 315 #define KSTAT_FLAG_LONGSTRINGS 0x40
315 316
316 317 /*
317 318 * Dynamic update support
318 319 *
319 320 * The kstat mechanism allows for an optional ks_update function to update
320 321 * kstat data. This is useful for drivers where the underlying device
321 322 * keeps cheap hardware stats, but extraction is expensive. Instead of
322 323 * constantly keeping the kstat data section up to date, you can supply a
323 324 * ks_update function which updates the kstat's data section on demand.
324 325 * To take advantage of this feature, simply set the ks_update field before
325 326 * calling kstat_install().
326 327 *
327 328 * The ks_update function, if supplied, must have the following structure:
328 329 *
329 330 * int
330 331 * foo_kstat_update(kstat_t *ksp, int rw)
331 332 * {
332 333 * if (rw == KSTAT_WRITE) {
333 334 * ... update the native stats from ksp->ks_data;
334 335 * return EACCES if you don't support this
335 336 * } else {
336 337 * ... update ksp->ks_data from the native stats
337 338 * }
338 339 * }
339 340 *
340 341 * The ks_update return codes are: 0 for success, EACCES if you don't allow
341 342 * KSTAT_WRITE, and EIO for any other type of error.
342 343 *
343 344 * In general, the ks_update function may need to refer to provider-private
344 345 * data; for example, it may need a pointer to the provider's raw statistics.
345 346 * The ks_private field is available for this purpose. Its use is entirely
346 347 * at the provider's discretion.
347 348 *
348 349 * All variable-size kstats MUST supply a ks_update routine, which computes
349 350 * and sets ks_data_size (and ks_ndata if that is meaningful), since these
350 351 * are needed to perform kstat snapshots (see below).
351 352 *
352 353 * No kstat locking should be done inside the ks_update routine. The caller
353 354 * will already be holding the kstat's ks_lock (to ensure consistent data).
354 355 */
355 356
356 357 #define KSTAT_READ 0
357 358 #define KSTAT_WRITE 1
358 359
359 360 /*
360 361 * Kstat snapshot
361 362 *
362 363 * In order to get a consistent view of a kstat's data, clients must obey
363 364 * the kstat's locking strategy. However, these clients may need to perform
364 365 * operations on the data which could cause a fault (e.g. copyout()), or
365 366 * operations which are simply expensive. Doing so could cause deadlock
366 367 * (e.g. if you're holding a disk's kstat lock which is ultimately required
367 368 * to resolve a copyout() fault), performance degradation (since the providers'
368 369 * activity is serialized at the kstat lock), device timing problems, etc.
369 370 *
370 371 * To avoid these problems, kstat data is provided via snapshots. Taking
371 372 * a snapshot is a simple process: allocate a wired-down kernel buffer,
372 373 * acquire the kstat's data lock, copy the data into the buffer ("take the
373 374 * snapshot"), and release the lock. This ensures that the kstat's data lock
374 375 * will be held as briefly as possible, and that no faults will occur while
375 376 * the lock is held.
376 377 *
377 378 * Normally, the snapshot is taken by default_kstat_snapshot(), which
378 379 * timestamps the data (sets ks_snaptime), copies it, and does a little
379 380 * massaging to deal with incomplete transactions on i/o kstats. However,
380 381 * this routine only works for kstats with contiguous data (the typical case).
381 382 * If you create a kstat whose data is, say, a linked list, you must provide
382 383 * your own ks_snapshot routine. The routine you supply must have the
383 384 * following prototype (replace "foo" with something appropriate):
384 385 *
385 386 * int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
386 387 *
387 388 * The minimal snapshot routine -- one which copies contiguous data that
388 389 * doesn't need any massaging -- would be this:
389 390 *
390 391 * ksp->ks_snaptime = gethrtime();
391 392 * if (rw == KSTAT_WRITE)
392 393 * bcopy(buf, ksp->ks_data, ksp->ks_data_size);
393 394 * else
394 395 * bcopy(ksp->ks_data, buf, ksp->ks_data_size);
395 396 * return (0);
396 397 *
397 398 * A more illuminating example is taking a snapshot of a linked list:
398 399 *
399 400 * ksp->ks_snaptime = gethrtime();
400 401 * if (rw == KSTAT_WRITE)
401 402 * return (EACCES); ... See below ...
402 403 * for (foo = first_foo; foo; foo = foo->next) {
403 404 * bcopy((char *) foo, (char *) buf, sizeof (struct foo));
404 405 * buf = ((struct foo *) buf) + 1;
405 406 * }
406 407 * return (0);
407 408 *
408 409 * In the example above, we have decided that we don't want to allow
409 410 * KSTAT_WRITE access, so we return EACCES if this is attempted.
410 411 *
411 412 * The key points are:
412 413 *
413 414 * (1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
414 415 * (2) Data gets copied from the kstat to the buffer on KSTAT_READ,
415 416 * and from the buffer to the kstat on KSTAT_WRITE.
416 417 * (3) ks_snapshot return values are: 0 for success, EACCES if you
417 418 * don't allow KSTAT_WRITE, and EIO for any other type of error.
418 419 *
419 420 * Named kstats (see section on "Named statistics" below) containing long
420 421 * strings (KSTAT_DATA_STRING) need special handling. The kstat driver
421 422 * assumes that all strings are copied into the buffer after the array of
422 423 * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
423 424 * into the copy within the buffer. The default snapshot routine does this,
424 425 * but overriding routines should contain at least the following:
425 426 *
426 427 * if (rw == KSTAT_READ) {
427 428 * kstat_named_t *knp = buf;
428 429 * char *end = knp + ksp->ks_ndata;
429 430 * uint_t i;
430 431 *
431 432 * ... Do the regular copy ...
432 433 * bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata);
433 434 *
434 435 * for (i = 0; i < ksp->ks_ndata; i++, knp++) {
435 436 * if (knp[i].data_type == KSTAT_DATA_STRING &&
436 437 * KSTAT_NAMED_STR_PTR(knp) != NULL) {
437 438 * bcopy(KSTAT_NAMED_STR_PTR(knp), end,
438 439 * KSTAT_NAMED_STR_BUFLEN(knp));
439 440 * KSTAT_NAMED_STR_PTR(knp) = end;
440 441 * end += KSTAT_NAMED_STR_BUFLEN(knp);
441 442 * }
442 443 * }
443 444 */
444 445
445 446 /*
446 447 * Named statistics.
447 448 *
448 449 * List of arbitrary name=value statistics.
449 450 */
450 451
451 452 typedef struct kstat_named {
452 453 char name[KSTAT_STRLEN]; /* name of counter */
453 454 uchar_t data_type; /* data type */
454 455 union {
455 456 char c[16]; /* enough for 128-bit ints */
456 457 int32_t i32;
457 458 uint32_t ui32;
458 459 struct {
459 460 union {
460 461 char *ptr; /* NULL-term string */
461 462 #if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
462 463 caddr32_t ptr32;
463 464 #endif
464 465 char __pad[8]; /* 64-bit padding */
465 466 } addr;
466 467 uint32_t len; /* # bytes for strlen + '\0' */
467 468 } str;
468 469 /*
469 470 * The int64_t and uint64_t types are not valid for a maximally conformant
470 471 * 32-bit compilation environment (cc -Xc) using compilers prior to the
471 472 * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
472 473 * In these cases, the visibility of i64 and ui64 is only permitted for
473 474 * 64-bit compilation environments or 32-bit non-maximally conformant
474 475 * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
↓ open down ↓ |
439 lines elided |
↑ open up ↑ |
475 476 * C99 ANSI C compilation environment, the long long type is supported.
476 477 * The _INT64_TYPE is defined by the implementation (see sys/int_types.h).
477 478 */
478 479 #if defined(_INT64_TYPE)
479 480 int64_t i64;
480 481 uint64_t ui64;
481 482 #endif
482 483 long l;
483 484 ulong_t ul;
484 485
486 + hrtime_t t;
487 +
485 488 /* These structure members are obsolete */
486 489
487 490 longlong_t ll;
488 491 u_longlong_t ull;
489 492 float f;
490 493 double d;
491 494 } value; /* value of counter */
492 495 } kstat_named_t;
493 496
494 497 #define KSTAT_DATA_CHAR 0
495 498 #define KSTAT_DATA_INT32 1
496 499 #define KSTAT_DATA_UINT32 2
497 500 #define KSTAT_DATA_INT64 3
498 501 #define KSTAT_DATA_UINT64 4
499 502
500 503 #if !defined(_LP64)
501 504 #define KSTAT_DATA_LONG KSTAT_DATA_INT32
502 505 #define KSTAT_DATA_ULONG KSTAT_DATA_UINT32
503 506 #else
504 507 #if !defined(_KERNEL)
505 508 #define KSTAT_DATA_LONG KSTAT_DATA_INT64
506 509 #define KSTAT_DATA_ULONG KSTAT_DATA_UINT64
507 510 #else
508 511 #define KSTAT_DATA_LONG 7 /* only visible to the kernel */
509 512 #define KSTAT_DATA_ULONG 8 /* only visible to the kernel */
510 513 #endif /* !_KERNEL */
511 514 #endif /* !_LP64 */
512 515
513 516 /*
514 517 * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
515 518 * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
516 519 * (kstat_named_t)). ks_data_size in these cases is equal to the sum of the
517 520 * amount of space required to store the strings (ie, the sum of
518 521 * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
519 522 * space required to store the kstat_named_t's.
520 523 *
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
521 524 * The default update routine will update ks_data_size automatically for
522 525 * variable-length kstats containing long strings (using the default update
523 526 * routine only makes sense if the string is the only thing that is changing
524 527 * in size, and ks_ndata is constant). Fixed-length kstats containing long
525 528 * strings must explicitly change ks_data_size (after creation but before
526 529 * initialization) to reflect the correct amount of space required for the
527 530 * long strings and the kstat_named_t's.
528 531 */
529 532 #define KSTAT_DATA_STRING 9
530 533
534 +/*
535 + * Value is an hrtime_t, in seconds since system boot.
536 + */
537 +#define KSTAT_DATA_TIME 10
538 +
531 539 /* These types are obsolete */
532 540
533 541 #define KSTAT_DATA_LONGLONG KSTAT_DATA_INT64
534 542 #define KSTAT_DATA_ULONGLONG KSTAT_DATA_UINT64
535 543 #define KSTAT_DATA_FLOAT 5
536 544 #define KSTAT_DATA_DOUBLE 6
537 545
538 546 #define KSTAT_NAMED_PTR(kptr) ((kstat_named_t *)(kptr)->ks_data)
539 547
540 548 /*
541 549 * Retrieve the pointer of the string contained in the given named kstat.
542 550 */
543 551 #define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
544 552
545 553 /*
546 554 * Retrieve the length of the buffer required to store the string in the given
547 555 * named kstat.
548 556 */
549 557 #define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
550 558
551 559 /*
552 560 * Interrupt statistics.
553 561 *
554 562 * An interrupt is a hard interrupt (sourced from the hardware device
555 563 * itself), a soft interrupt (induced by the system via the use of
556 564 * some system interrupt source), a watchdog interrupt (induced by
557 565 * a periodic timer call), spurious (an interrupt entry point was
558 566 * entered but there was no interrupt condition to service),
559 567 * or multiple service (an interrupt condition was detected and
560 568 * serviced just prior to returning from any of the other types).
561 569 *
562 570 * Measurement of the spurious class of interrupts is useful for
563 571 * autovectored devices in order to pinpoint any interrupt latency
564 572 * problems in a particular system configuration.
565 573 *
566 574 * Devices that have more than one interrupt of the same
567 575 * type should use multiple structures.
568 576 */
569 577
570 578 #define KSTAT_INTR_HARD 0
571 579 #define KSTAT_INTR_SOFT 1
572 580 #define KSTAT_INTR_WATCHDOG 2
573 581 #define KSTAT_INTR_SPURIOUS 3
574 582 #define KSTAT_INTR_MULTSVC 4
575 583
576 584 #define KSTAT_NUM_INTRS 5
577 585
578 586 typedef struct kstat_intr {
579 587 uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */
580 588 } kstat_intr_t;
581 589
582 590 #define KSTAT_INTR_PTR(kptr) ((kstat_intr_t *)(kptr)->ks_data)
583 591
584 592 /*
585 593 * I/O statistics.
586 594 */
587 595
588 596 typedef struct kstat_io {
589 597
590 598 /*
591 599 * Basic counters.
592 600 *
593 601 * The counters should be updated at the end of service
594 602 * (e.g., just prior to calling biodone()).
595 603 */
596 604
597 605 u_longlong_t nread; /* number of bytes read */
598 606 u_longlong_t nwritten; /* number of bytes written */
599 607 uint_t reads; /* number of read operations */
600 608 uint_t writes; /* number of write operations */
601 609
602 610 /*
603 611 * Accumulated time and queue length statistics.
604 612 *
605 613 * Accumulated time statistics are kept as a running sum
606 614 * of "active" time. Queue length statistics are kept as a
607 615 * running sum of the product of queue length and elapsed time
608 616 * at that length -- i.e., a Riemann sum for queue length
609 617 * integrated against time. (You can also think of the active time
610 618 * as a Riemann sum, for the boolean function (queue_length > 0)
611 619 * integrated against time, or you can think of it as the
612 620 * Lebesgue measure of the set on which queue_length > 0.)
613 621 *
614 622 * ^
615 623 * | _________
616 624 * 8 | i4 |
617 625 * | | |
618 626 * Queue 6 | |
619 627 * Length | _________ | |
620 628 * 4 | i2 |_______| |
621 629 * | | i3 |
622 630 * 2_______| |
623 631 * | i1 |
624 632 * |_______________________________|
625 633 * Time-> t1 t2 t3 t4
626 634 *
627 635 * At each change of state (entry or exit from the queue),
628 636 * we add the elapsed time (since the previous state change)
629 637 * to the active time if the queue length was non-zero during
630 638 * that interval; and we add the product of the elapsed time
631 639 * times the queue length to the running length*time sum.
632 640 *
633 641 * This method is generalizable to measuring residency
634 642 * in any defined system: instead of queue lengths, think
635 643 * of "outstanding RPC calls to server X".
636 644 *
637 645 * A large number of I/O subsystems have at least two basic
638 646 * "lists" of transactions they manage: one for transactions
639 647 * that have been accepted for processing but for which processing
640 648 * has yet to begin, and one for transactions which are actively
641 649 * being processed (but not done). For this reason, two cumulative
642 650 * time statistics are defined here: wait (pre-service) time,
643 651 * and run (service) time.
644 652 *
645 653 * All times are 64-bit nanoseconds (hrtime_t), as returned by
646 654 * gethrtime().
647 655 *
648 656 * The units of cumulative busy time are accumulated nanoseconds.
649 657 * The units of cumulative length*time products are elapsed time
650 658 * times queue length.
651 659 *
652 660 * Updates to the fields below are performed implicitly by calls to
653 661 * these five functions:
654 662 *
655 663 * kstat_waitq_enter()
656 664 * kstat_waitq_exit()
657 665 * kstat_runq_enter()
658 666 * kstat_runq_exit()
659 667 *
660 668 * kstat_waitq_to_runq() (see below)
661 669 * kstat_runq_back_to_waitq() (see below)
662 670 *
663 671 * Since kstat_waitq_exit() is typically followed immediately
664 672 * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
665 673 * function which performs both operations. This is a performance
666 674 * win since only one timestamp is required.
667 675 *
668 676 * In some instances, it may be necessary to move a request from
669 677 * the run queue back to the wait queue, e.g. for write throttling.
670 678 * For these situations, call kstat_runq_back_to_waitq().
671 679 *
672 680 * These fields should never be updated by any other means.
673 681 */
674 682
675 683 hrtime_t wtime; /* cumulative wait (pre-service) time */
676 684 hrtime_t wlentime; /* cumulative wait length*time product */
677 685 hrtime_t wlastupdate; /* last time wait queue changed */
678 686 hrtime_t rtime; /* cumulative run (service) time */
679 687 hrtime_t rlentime; /* cumulative run length*time product */
680 688 hrtime_t rlastupdate; /* last time run queue changed */
681 689
682 690 uint_t wcnt; /* count of elements in wait state */
683 691 uint_t rcnt; /* count of elements in run state */
684 692
685 693 } kstat_io_t;
686 694
687 695 #define KSTAT_IO_PTR(kptr) ((kstat_io_t *)(kptr)->ks_data)
688 696
689 697 /*
690 698 * Event timer statistics - cumulative elapsed time and number of events.
691 699 *
692 700 * Updates to these fields are performed implicitly by calls to
693 701 * kstat_timer_start() and kstat_timer_stop().
694 702 */
695 703
696 704 typedef struct kstat_timer {
697 705 char name[KSTAT_STRLEN]; /* event name */
698 706 uchar_t resv; /* reserved */
699 707 u_longlong_t num_events; /* number of events */
700 708 hrtime_t elapsed_time; /* cumulative elapsed time */
701 709 hrtime_t min_time; /* shortest event duration */
702 710 hrtime_t max_time; /* longest event duration */
703 711 hrtime_t start_time; /* previous event start time */
704 712 hrtime_t stop_time; /* previous event stop time */
705 713 } kstat_timer_t;
706 714
707 715 #define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data)
708 716
709 717 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
710 718
711 719 #include <sys/t_lock.h>
712 720
713 721 extern kid_t kstat_chain_id; /* bumped at each state change */
714 722 extern void kstat_init(void); /* initialize kstat framework */
715 723
716 724 /*
717 725 * Adding and deleting kstats.
718 726 *
719 727 * The typical sequence to add a kstat is:
720 728 *
721 729 * ksp = kstat_create(module, instance, name, class, type, ndata, flags);
722 730 * if (ksp) {
723 731 * ... provider initialization, if necessary
724 732 * kstat_install(ksp);
725 733 * }
726 734 *
727 735 * There are three logically distinct steps here:
728 736 *
729 737 * Step 1: System Initialization (kstat_create)
730 738 *
731 739 * kstat_create() performs system initialization. kstat_create()
732 740 * allocates memory for the entire kstat (header plus data), initializes
733 741 * all header fields, initializes the data section to all zeroes, assigns
734 742 * a unique KID, and puts the kstat onto the system's kstat chain.
735 743 * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
736 744 * because the provider (caller) has not yet had a chance to initialize
737 745 * the data section.
738 746 *
739 747 * By default, kstats are exported to all zones on the system. A kstat may be
740 748 * created via kstat_create_zone() to specify a zone to which the statistics
741 749 * should be exported. kstat_zone_add() may be used to specify additional
742 750 * zones to which the statistics are to be exported.
743 751 *
744 752 * Step 2: Provider Initialization
745 753 *
746 754 * The provider performs any necessary initialization of the data section,
747 755 * e.g. setting the name fields in a KSTAT_TYPE_NAMED. Virtual kstats set
748 756 * the ks_data field at this time. The provider may also set the ks_update,
749 757 * ks_snapshot, ks_private, and ks_lock fields if necessary.
750 758 *
751 759 * Step 3: Installation (kstat_install)
752 760 *
753 761 * Once the kstat is completely initialized, kstat_install() clears the
754 762 * INVALID flag, thus making the kstat accessible to the outside world.
755 763 * kstat_install() also clears the DORMANT flag for persistent kstats.
756 764 *
757 765 * Removing a kstat from the system
758 766 *
759 767 * kstat_delete(ksp) removes ksp from the kstat chain and frees all
760 768 * associated system resources. NOTE: When you call kstat_delete(),
761 769 * you must NOT be holding that kstat's ks_lock. Otherwise, you may
762 770 * deadlock with a kstat reader.
763 771 *
764 772 * Persistent kstats
765 773 *
766 774 * From the provider's point of view, persistence is transparent. The only
767 775 * difference between ephemeral (normal) kstats and persistent kstats
768 776 * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create(). Magically,
769 777 * this has the effect of making your data visible even when you're
770 778 * not home. Persistence is important to tools like iostat, which want
771 779 * to get a meaningful picture of disk activity. Without persistence,
772 780 * raw disk i/o statistics could never accumulate: they would come and
773 781 * go with each open/close of the raw device.
774 782 *
775 783 * The magic of persistence works by slightly altering the behavior of
776 784 * kstat_create() and kstat_delete(). The first call to kstat_create()
777 785 * creates a new kstat, as usual. However, kstat_delete() does not
778 786 * actually delete the kstat: it performs one final update of the data
779 787 * (i.e., calls the ks_update routine), marks the kstat as dormant, and
780 788 * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
781 789 * to their default values (since they might otherwise point to garbage,
782 790 * e.g. if the provider is going away). kstat clients can still access
783 791 * the dormant kstat just like a live kstat; they just continue to see
784 792 * the final data values as long as the kstat remains dormant.
785 793 * All subsequent kstat_create() calls simply find the already-existing,
786 794 * dormant kstat and return a pointer to it, without altering any fields.
787 795 * The provider then performs its usual initialization sequence, and
788 796 * calls kstat_install(). kstat_install() uses the old data values to
789 797 * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
790 798 * thus making it seem like you were never gone.
791 799 */
792 800
793 801 extern kstat_t *kstat_create(const char *, int, const char *, const char *,
794 802 uchar_t, uint_t, uchar_t);
795 803 extern kstat_t *kstat_create_zone(const char *, int, const char *,
796 804 const char *, uchar_t, uint_t, uchar_t, zoneid_t);
797 805 extern void kstat_install(kstat_t *);
798 806 extern void kstat_delete(kstat_t *);
799 807 extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
800 808 extern void kstat_set_string(char *, const char *);
801 809 extern void kstat_delete_byname(const char *, int, const char *);
802 810 extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
803 811 extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
804 812 extern void kstat_timer_init(kstat_timer_t *, const char *);
805 813 extern void kstat_waitq_enter(kstat_io_t *);
806 814 extern void kstat_waitq_exit(kstat_io_t *);
807 815 extern void kstat_runq_enter(kstat_io_t *);
808 816 extern void kstat_runq_exit(kstat_io_t *);
809 817 extern void kstat_waitq_to_runq(kstat_io_t *);
810 818 extern void kstat_runq_back_to_waitq(kstat_io_t *);
811 819 extern void kstat_timer_start(kstat_timer_t *);
812 820 extern void kstat_timer_stop(kstat_timer_t *);
813 821
814 822 extern void kstat_zone_add(kstat_t *, zoneid_t);
815 823 extern void kstat_zone_remove(kstat_t *, zoneid_t);
816 824 extern int kstat_zone_find(kstat_t *, zoneid_t);
817 825
818 826 extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
819 827 extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
820 828 extern void kstat_rele(kstat_t *);
821 829
822 830 #endif /* defined(_KERNEL) */
823 831
824 832 #ifdef __cplusplus
825 833 }
826 834 #endif
827 835
828 836 #endif /* _SYS_KSTAT_H */
↓ open down ↓ |
288 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX