1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * rewritten from UCB 4.13 83/09/25
  27  * rewritten from SunOS 4.1 SID 1.18 89/10/06
  28  */
  29 /*
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  32  * Copyright 2016 James S. Blachly, MD. All rights reserved.
  33  */
  34 
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdarg.h>
  38 #include <ctype.h>
  39 #include <unistd.h>
  40 #include <memory.h>
  41 #include <errno.h>
  42 #include <string.h>
  43 #include <signal.h>
  44 #include <sys/types.h>
  45 #include <time.h>
  46 #include <sys/time.h>
  47 #include <sys/sysinfo.h>
  48 #include <inttypes.h>
  49 #include <strings.h>
  50 #include <sys/systeminfo.h>
  51 #include <kstat.h>
  52 #include <locale.h>
  53 
  54 #include "dsr.h"
  55 #include "statcommon.h"
  56 
  57 #define DISK_OLD                0x0001
  58 #define DISK_NEW                0x0002
  59 #define DISK_EXTENDED           0x0004
  60 #define DISK_ERRORS             0x0008
  61 #define DISK_EXTENDED_ERRORS    0x0010
  62 #define DISK_IOPATH_LI          0x0020  /* LunInitiator */
  63 #define DISK_IOPATH_LTI         0x0040  /* LunTargetInitiator */
  64 
  65 #define DISK_NORMAL             (DISK_OLD | DISK_NEW)
  66 #define DISK_IO_MASK            (DISK_OLD | DISK_NEW | DISK_EXTENDED)
  67 #define DISK_ERROR_MASK         (DISK_ERRORS | DISK_EXTENDED_ERRORS)
  68 #define PRINT_VERTICAL          (DISK_ERROR_MASK | DISK_EXTENDED)
  69 
  70 #define REPRINT 19
  71 
  72 #define NUMBER_OF_ERR_COUNTERS  3
  73 
  74 /*
  75  * It's really a pseudo-gigabyte. We use 1000000000 bytes so that the disk
  76  * labels don't look bad. 1GB is really 1073741824 bytes.
  77  */
  78 #define DISK_GIGABYTE   1000000000.0
  79 
  80 /*
  81  * Function desciptor to be called when extended
  82  * headers are used.
  83  */
  84 typedef struct formatter {
  85         void (*nfunc)(void);
  86         struct formatter *next;
  87 } format_t;
  88 
  89 /*
  90  * Used to get formatting right when printing tty/cpu
  91  * data to the right of disk data
  92  */
  93 enum show_disk_mode {
  94         SHOW_FIRST_ONLY,
  95         SHOW_SECOND_ONWARDS,
  96         SHOW_ALL
  97 };
  98 
  99 enum show_disk_mode show_disk_mode = SHOW_ALL;
 100 
 101 char *cmdname = "iostat";
 102 int caught_cont = 0;
 103 
 104 static char one_blank[] = " ";
 105 static char two_blanks[] = "  ";
 106 
 107 /*
 108  * count for number of lines to be emitted before a header is
 109  * shown again. Only used for the basic format.
 110  */
 111 static  uint_t  tohdr = 1;
 112 
 113 /*
 114  * If we're in raw format, have we printed a header? We only do it
 115  * once for raw but we emit it every REPRINT lines in non-raw format.
 116  * This applies only for the basic header. The extended header is
 117  * done only once in both formats.
 118  */
 119 static  uint_t  hdr_out;
 120 
 121 /*
 122  * Flags representing arguments from command line
 123  */
 124 static  uint_t  do_tty;                 /* show tty info (-t) */
 125 static  uint_t  do_disk;                /* show disk info per selected */
 126                                         /* format (-d, -D, -e, -E, -x -X -Y) */
 127 static  uint_t  do_cpu;                 /* show cpu info (-c) */
 128 static  uint_t  do_interval;            /* do intervals (-I) */
 129 static  int     do_partitions;          /* per-partition stats (-p) */
 130 static  int     do_partitions_only;     /* per-partition stats only (-P) */
 131                                         /* no per-device stats for disks */
 132 static  uint_t  do_conversions;         /* display disks as cXtYdZ (-n) */
 133 static  uint_t  do_megabytes;           /* display data in MB/sec (-M) */
 134 static  uint_t  do_controller;          /* display controller info (-C) */
 135 static  uint_t  do_raw;                 /* emit raw format (-r) */
 136 static  uint_t  timestamp_fmt = NODATE; /* timestamp  each display (-T) */
 137 static  uint_t  do_devid;               /* -E should show devid */
 138 
 139 /*
 140  * Default number of disk drives to be displayed in basic format
 141  */
 142 #define DEFAULT_LIMIT   4
 143 
 144 struct iodev_filter df;
 145 
 146 static  uint_t  suppress_state;         /* skip state change messages */
 147 static  uint_t  suppress_zero;          /* skip zero valued lines */
 148 static  uint_t  show_mountpts;          /* show mount points */
 149 static  int     interval;               /* interval (seconds) to output */
 150 static  int     iter;                   /* iterations from command line */
 151 
 152 #define SMALL_SCRATCH_BUFLEN    MAXNAMELEN
 153 
 154 static int      iodevs_nl;              /* name field width */
 155 #define IODEVS_NL_MIN           6       /* not too thin for "device" */
 156 #define IODEVS_NL_MAX           24      /* but keep full width under 80 */
 157 
 158 static  char    disk_header[132];
 159 static  uint_t  dh_len;                 /* disk header length for centering */
 160 static  int     lineout;                /* data waiting to be printed? */
 161 
 162 static struct snapshot *newss;
 163 static struct snapshot *oldss;
 164 static  double  getime;                 /* elapsed time */
 165 static  double  percent;                /* 100 / etime */
 166 
 167 /*
 168  * List of functions to be called which will construct the desired output
 169  */
 170 static format_t *formatter_list;
 171 static format_t *formatter_end;
 172 
 173 static u_longlong_t     ull_delta(u_longlong_t, u_longlong_t);
 174 static uint_t   u32_delta(uint_t, uint_t);
 175 static void setup(void (*nfunc)(void));
 176 static void print_tty_hdr1(void);
 177 static void print_tty_hdr2(void);
 178 static void print_cpu_hdr1(void);
 179 static void print_cpu_hdr2(void);
 180 static void print_tty_data(void);
 181 static void print_cpu_data(void);
 182 static void print_err_hdr(void);
 183 static void print_disk_header(void);
 184 static void hdrout(void);
 185 static void disk_errors(void);
 186 static void do_newline(void);
 187 static void push_out(const char *, ...);
 188 static void printhdr(int);
 189 static void printxhdr(void);
 190 static void usage(void);
 191 static void do_args(int, char **);
 192 static void do_format(void);
 193 static void show_all_disks(void);
 194 static void show_first_disk(void);
 195 static void show_other_disks(void);
 196 static void show_disk_errors(void *, void *, void *);
 197 static void write_core_header(void);
 198 static int  fzero(double value);
 199 static int  safe_strtoi(char const *val, char *errmsg);
 200 
 201 int
 202 main(int argc, char **argv)
 203 {
 204         enum snapshot_types types = SNAP_SYSTEM;
 205         kstat_ctl_t *kc;
 206         long hz;
 207         int forever;
 208         hrtime_t start_n;
 209         hrtime_t period_n;
 210 
 211         (void) setlocale(LC_ALL, "");
 212 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
 213 #define TEXT_DOMAIN "SYS_TEST"          /* Use this only if it weren't */
 214 #endif
 215         (void) textdomain(TEXT_DOMAIN);
 216 
 217         do_args(argc, argv);
 218 
 219         /*
 220          * iostat historically showed CPU changes, even though
 221          * it doesn't provide much useful information
 222          */
 223         types |= SNAP_CPUS;
 224 
 225         if (do_disk)
 226                 types |= SNAP_IODEVS;
 227 
 228         if (do_disk && !do_partitions_only)
 229                 df.if_allowed_types |= IODEV_DISK;
 230         if (do_disk & DISK_IOPATH_LI) {
 231                 df.if_allowed_types |= IODEV_IOPATH_LTI;
 232                 types |= SNAP_IOPATHS_LI;
 233         }
 234         if (do_disk & DISK_IOPATH_LTI) {
 235                 df.if_allowed_types |= IODEV_IOPATH_LTI;
 236                 types |= SNAP_IOPATHS_LTI;
 237         }
 238         if (do_disk & DISK_ERROR_MASK)
 239                 types |= SNAP_IODEV_ERRORS;
 240         if (do_partitions || do_partitions_only)
 241                 df.if_allowed_types |= IODEV_PARTITION;
 242         if (do_conversions)
 243                 types |= SNAP_IODEV_PRETTY;
 244         if (do_devid)
 245                 types |= SNAP_IODEV_DEVID;
 246         if (do_controller) {
 247                 if (!(do_disk & PRINT_VERTICAL) ||
 248                     (do_disk & DISK_EXTENDED_ERRORS))
 249                         fail(0, "-C can only be used with -e or -x.");
 250                 types |= SNAP_CONTROLLERS;
 251                 df.if_allowed_types |= IODEV_CONTROLLER;
 252         }
 253 
 254         hz = sysconf(_SC_CLK_TCK);
 255 
 256         /*
 257          * Undocumented behavior - sending a SIGCONT will result
 258          * in a new header being emitted. Used only if we're not
 259          * doing extended headers. This is a historical
 260          * artifact.
 261          */
 262         if (!(do_disk & PRINT_VERTICAL))
 263                 (void) signal(SIGCONT, printhdr);
 264 
 265         if (interval)
 266                 period_n = (hrtime_t)interval * NANOSEC;
 267 
 268         kc = open_kstat();
 269         if (interval)
 270                 start_n = gethrtime();
 271         newss = acquire_snapshot(kc, types, &df);
 272 
 273         /* compute width of "device" field */
 274         iodevs_nl = newss->s_iodevs_is_name_maxlen;
 275         iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
 276             IODEVS_NL_MIN : iodevs_nl;
 277         iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
 278             IODEVS_NL_MAX : iodevs_nl;
 279 
 280         do_format();
 281 
 282         forever = (iter == 0);
 283         do {
 284                 if (do_conversions && show_mountpts)
 285                         do_mnttab();
 286 
 287                 if (do_tty || do_cpu) {
 288                         kstat_t *oldks;
 289                         oldks = oldss ? &oldss->s_sys.ss_agg_sys : NULL;
 290                         getime = cpu_ticks_delta(oldks,
 291                             &newss->s_sys.ss_agg_sys);
 292                         percent = (getime > 0.0) ? 100.0 / getime : 0.0;
 293                         getime = (getime / nr_active_cpus(newss)) / hz;
 294                         if (getime == 0.0)
 295                                 getime = (double)interval;
 296                         if (getime == 0.0 || do_interval)
 297                                 getime = 1.0;
 298                 }
 299 
 300                 if (formatter_list) {
 301                         format_t *tmp;
 302                         tmp = formatter_list;
 303 
 304                         if (timestamp_fmt != NODATE)
 305                                 print_timestamp(timestamp_fmt);
 306 
 307                         while (tmp) {
 308                                 (tmp->nfunc)();
 309                                 tmp = tmp->next;
 310                         }
 311                         (void) fflush(stdout);
 312                 }
 313 
 314                 /* only remaining/doing a single iteration, we are done */
 315                 if (iter == 1)
 316                         continue;
 317 
 318                 if (interval > 0)
 319                         /* Have a kip */
 320                         sleep_until(&start_n, period_n, forever, &caught_cont);
 321 
 322                 free_snapshot(oldss);
 323                 oldss = newss;
 324                 newss = acquire_snapshot(kc, types, &df);
 325                 iodevs_nl = (newss->s_iodevs_is_name_maxlen > iodevs_nl) ?
 326                     newss->s_iodevs_is_name_maxlen : iodevs_nl;
 327                 iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
 328                     IODEVS_NL_MIN : iodevs_nl;
 329                 iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
 330                     IODEVS_NL_MAX : iodevs_nl;
 331 
 332                 if (!suppress_state)
 333                         snapshot_report_changes(oldss, newss);
 334 
 335                 /* if config changed, show stats from boot */
 336                 if (snapshot_has_changed(oldss, newss)) {
 337                         free_snapshot(oldss);
 338                         oldss = NULL;
 339                 }
 340 
 341         } while (--iter);
 342 
 343         free_snapshot(oldss);
 344         free_snapshot(newss);
 345         (void) kstat_close(kc);
 346         free(df.if_names);
 347         return (0);
 348 }
 349 
 350 /*
 351  * Some magic numbers used in header formatting.
 352  *
 353  * DISK_LEN = length of either "kps tps serv" or "wps rps util"
 354  *            using 0 as the first position
 355  *
 356  * DISK_ERROR_LEN = length of "s/w h/w trn tot" with one space on
 357  *              either side. Does not use zero as first pos.
 358  *
 359  * DEVICE_LEN = length of "device" + 1 character.
 360  */
 361 
 362 #define DISK_LEN        11
 363 #define DISK_ERROR_LEN  16
 364 #define DEVICE_LEN      7
 365 
 366 /*ARGSUSED*/
 367 static void
 368 show_disk_name(void *v1, void *v2, void *data)
 369 {
 370         struct iodev_snapshot *dev = (struct iodev_snapshot *)v2;
 371         size_t slen;
 372         char *name;
 373         char fbuf[SMALL_SCRATCH_BUFLEN];
 374 
 375         if (dev == NULL)
 376                 return;
 377 
 378         name = do_conversions ? dev->is_pretty : dev->is_name;
 379         name = name ? name : dev->is_name;
 380 
 381         if (!do_raw) {
 382                 uint_t width;
 383 
 384                 slen = strlen(name);
 385                 /*
 386                  * The length is less
 387                  * than the section
 388                  * which will be displayed
 389                  * on the next line.
 390                  * Center the entry.
 391                  */
 392 
 393                 width = (DISK_LEN + 1)/2 + (slen / 2);
 394                 (void) snprintf(fbuf, sizeof (fbuf),
 395                     "%*s", width, name);
 396                 name = fbuf;
 397                 push_out("%-13.13s ", name);
 398         } else {
 399                 push_out(name);
 400         }
 401 }
 402 
 403 /*ARGSUSED*/
 404 static void
 405 show_disk_header(void *v1, void *v2, void *data)
 406 {
 407         push_out(disk_header);
 408 }
 409 
 410 /*
 411  * Write out a two line header. What is written out depends on the flags
 412  * selected but in the worst case consists of a tty header, a disk header
 413  * providing information for 4 disks and a cpu header.
 414  *
 415  * The tty header consists of the word "tty" on the first line above the
 416  * words "tin tout" on the next line. If present the tty portion consumes
 417  * the first 10 characters of each line since "tin tout" is surrounded
 418  * by single spaces.
 419  *
 420  * Each of the disk sections is a 14 character "block" in which the name of
 421  * the disk is centered in the first 12 characters of the first line.
 422  *
 423  * The cpu section is an 11 character block with "cpu" centered over the
 424  * section.
 425  *
 426  * The worst case should look as follows:
 427  *
 428  * 0---------1--------2---------3---------4---------5---------6---------7-------
 429  *    tty        sd0           sd1           sd2           sd3           cpu
 430  *  tin tout kps tps serv  kps tps serv  kps tps serv  kps tps serv  us sy dt id
 431  *  NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NN NN NN NN
 432  *
 433  * When -D is specified, the disk header looks as follows (worst case):
 434  *
 435  * 0---------1--------2---------3---------4---------5---------6---------7-------
 436  *     tty        sd0           sd1             sd2          sd3          cpu
 437  *   tin tout rps wps util  rps wps util  rps wps util  rps wps util us sy dt id
 438  *   NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN NN NN NN NN
 439  */
 440 static void
 441 printhdr(int sig)
 442 {
 443         /*
 444          * If we're here because a signal fired, reenable the
 445          * signal.
 446          */
 447         if (sig)
 448                 (void) signal(SIGCONT, printhdr);
 449         if (sig == SIGCONT)
 450                 caught_cont = 1;
 451         /*
 452          * Horizontal mode headers
 453          *
 454          * First line
 455          */
 456         if (do_tty)
 457                 print_tty_hdr1();
 458 
 459         if (do_disk & DISK_NORMAL) {
 460                 (void) snapshot_walk(SNAP_IODEVS, NULL, newss,
 461                     show_disk_name, NULL);
 462         }
 463 
 464         if (do_cpu)
 465                 print_cpu_hdr1();
 466         do_newline();
 467 
 468         /*
 469          * Second line
 470          */
 471         if (do_tty)
 472                 print_tty_hdr2();
 473 
 474         if (do_disk & DISK_NORMAL) {
 475                 (void) snapshot_walk(SNAP_IODEVS, NULL, newss,
 476                     show_disk_header, NULL);
 477         }
 478 
 479         if (do_cpu)
 480                 print_cpu_hdr2();
 481         do_newline();
 482 
 483         tohdr = REPRINT;
 484 }
 485 
 486 /*
 487  * Write out the extended header centered over the core information.
 488  */
 489 static void
 490 write_core_header(void)
 491 {
 492         char *edev = "extended device statistics";
 493         uint_t lead_space_ct;
 494         uint_t follow_space_ct;
 495         size_t edevlen;
 496 
 497         if (do_raw == 0) {
 498                 /*
 499                  * The things we do to look nice...
 500                  *
 501                  * Center the core output header. Make sure we have the
 502                  * right number of trailing spaces for follow-on headers
 503                  * (i.e., cpu and/or tty and/or errors).
 504                  */
 505                 edevlen = strlen(edev);
 506                 lead_space_ct = dh_len - edevlen;
 507                 lead_space_ct /= 2;
 508                 if (lead_space_ct > 0) {
 509                         follow_space_ct = dh_len - (lead_space_ct + edevlen);
 510                         if (do_disk & DISK_ERRORS)
 511                                 follow_space_ct -= DISK_ERROR_LEN;
 512                         if ((do_disk & DISK_EXTENDED) && do_conversions)
 513                                 follow_space_ct -= DEVICE_LEN;
 514 
 515                         push_out("%1$*2$.*2$s%3$s%4$*5$.*5$s", one_blank,
 516                             lead_space_ct, edev, one_blank, follow_space_ct);
 517                 } else
 518                         push_out("%56s", edev);
 519         } else
 520                 push_out(edev);
 521 }
 522 
 523 /*
 524  * In extended mode headers, we don't want to reprint the header on
 525  * signals as they are printed every time anyways.
 526  */
 527 static void
 528 printxhdr(void)
 529 {
 530 
 531         /*
 532          * Vertical mode headers
 533          */
 534         if (do_disk & DISK_EXTENDED)
 535                 setup(write_core_header);
 536         if (do_disk & DISK_ERRORS)
 537                 setup(print_err_hdr);
 538 
 539         if (do_conversions) {
 540                 setup(do_newline);
 541                 if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
 542                         setup(print_disk_header);
 543                 setup(do_newline);
 544         } else {
 545                 if (do_tty)
 546                         setup(print_tty_hdr1);
 547                 if (do_cpu)
 548                         setup(print_cpu_hdr1);
 549                 setup(do_newline);
 550 
 551                 if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
 552                         setup(print_disk_header);
 553                 if (do_tty)
 554                         setup(print_tty_hdr2);
 555                 if (do_cpu)
 556                         setup(print_cpu_hdr2);
 557                 setup(do_newline);
 558         }
 559 }
 560 
 561 /*
 562  * Write out a line for this disk - note that show_disk writes out
 563  * full lines or blocks for each selected disk.
 564  */
 565 static void
 566 show_disk(void *v1, void *v2, void *data)
 567 {
 568         uint32_t err_counters[NUMBER_OF_ERR_COUNTERS];
 569         boolean_t display_err_counters = do_disk & DISK_ERRORS;
 570         struct iodev_snapshot *old = (struct iodev_snapshot *)v1;
 571         struct iodev_snapshot *new = (struct iodev_snapshot *)v2;
 572         int *count = (int *)data;
 573         double rps, wps, tps, mtps, krps, kwps, kps, avw, avr, w_pct, r_pct;
 574         double wserv, rserv, serv;
 575         double iosize;  /* kb/sec or MB/sec */
 576         double etime, hr_etime;
 577         char *disk_name;
 578         u_longlong_t ldeltas;
 579         uint_t udeltas;
 580         uint64_t t_delta;
 581         uint64_t w_delta;
 582         uint64_t r_delta;
 583         int doit = 1;
 584         uint_t toterrs;
 585         char *fstr;
 586 
 587         if (new == NULL)
 588                 return;
 589 
 590         switch (show_disk_mode) {
 591         case SHOW_FIRST_ONLY:
 592                 if (count != NULL && *count)
 593                         return;
 594                 break;
 595 
 596         case SHOW_SECOND_ONWARDS:
 597                 if (count != NULL && !*count) {
 598                         (*count)++;
 599                         return;
 600                 }
 601                 break;
 602 
 603         default:
 604                 break;
 605         }
 606 
 607         disk_name = do_conversions ? new->is_pretty : new->is_name;
 608         disk_name = disk_name ? disk_name : new->is_name;
 609 
 610         /*
 611          * Only do if we want IO stats - Avoids errors traveling this
 612          * section if that's all we want to see.
 613          */
 614         if (do_disk & DISK_IO_MASK) {
 615                 if (old) {
 616                         t_delta = hrtime_delta(old->is_snaptime,
 617                             new->is_snaptime);
 618                 } else {
 619                         t_delta = hrtime_delta(new->is_crtime,
 620                             new->is_snaptime);
 621                 }
 622 
 623                 if (new->is_nr_children) {
 624                         if (new->is_type == IODEV_CONTROLLER) {
 625                                 t_delta /= new->is_nr_children;
 626                         } else if ((new->is_type == IODEV_IOPATH_LT) ||
 627                             (new->is_type == IODEV_IOPATH_LI)) {
 628                                 /* synthetic path */
 629                                 if (!old) {
 630                                         t_delta = new->is_crtime;
 631                                 }
 632                                 t_delta /= new->is_nr_children;
 633                         }
 634                 }
 635 
 636                 hr_etime = (double)t_delta;
 637                 if (hr_etime == 0.0)
 638                         hr_etime = (double)NANOSEC;
 639                 etime = hr_etime / (double)NANOSEC;
 640 
 641                 /* reads per second */
 642                 udeltas = u32_delta(old ? old->is_stats.reads : 0,
 643                     new->is_stats.reads);
 644                 rps = (double)udeltas;
 645                 rps /= etime;
 646 
 647                 /* writes per second */
 648                 udeltas = u32_delta(old ? old->is_stats.writes : 0,
 649                     new->is_stats.writes);
 650                 wps = (double)udeltas;
 651                 wps /= etime;
 652 
 653                 tps = rps + wps;
 654                         /* transactions per second */
 655 
 656                 /*
 657                  * report throughput as either kb/sec or MB/sec
 658                  */
 659 
 660                 if (!do_megabytes)
 661                         iosize = 1024.0;
 662                 else
 663                         iosize = 1048576.0;
 664 
 665                 ldeltas = ull_delta(old ? old->is_stats.nread : 0,
 666                     new->is_stats.nread);
 667                 if (ldeltas) {
 668                         krps = (double)ldeltas;
 669                         krps /= etime;
 670                         krps /= iosize;
 671                 } else
 672                         krps = 0.0;
 673 
 674                 ldeltas = ull_delta(old ? old->is_stats.nwritten : 0,
 675                     new->is_stats.nwritten);
 676                 if (ldeltas) {
 677                         kwps = (double)ldeltas;
 678                         kwps /= etime;
 679                         kwps /= iosize;
 680                 } else
 681                         kwps = 0.0;
 682 
 683                 /*
 684                  * Blocks transferred per second
 685                  */
 686                 kps = krps + kwps;
 687 
 688                 /*
 689                  * Average number of wait transactions waiting
 690                  */
 691                 w_delta = hrtime_delta((u_longlong_t)
 692                     (old ? old->is_stats.wlentime : 0),
 693                     new->is_stats.wlentime);
 694                 if (w_delta) {
 695                         avw = (double)w_delta;
 696                         avw /= hr_etime;
 697                 } else
 698                         avw = 0.0;
 699 
 700                 /*
 701                  * Average number of run transactions waiting
 702                  */
 703                 r_delta = hrtime_delta(old ? old->is_stats.rlentime : 0,
 704                     new->is_stats.rlentime);
 705                 if (r_delta) {
 706                         avr = (double)r_delta;
 707                         avr /= hr_etime;
 708                 } else
 709                         avr = 0.0;
 710 
 711                 /*
 712                  * Average wait service time in milliseconds
 713                  */
 714                 if (tps > 0.0 && (avw != 0.0 || avr != 0.0)) {
 715                         mtps = 1000.0 / tps;
 716                         if (avw != 0.0)
 717                                 wserv = avw * mtps;
 718                         else
 719                                 wserv = 0.0;
 720 
 721                         if (avr != 0.0)
 722                                 rserv = avr * mtps;
 723                         else
 724                                 rserv = 0.0;
 725                         serv = rserv + wserv;
 726                 } else {
 727                         rserv = 0.0;
 728                         wserv = 0.0;
 729                         serv = 0.0;
 730                 }
 731 
 732                 /* % of time there is a transaction waiting for service */
 733                 t_delta = hrtime_delta(old ? old->is_stats.wtime : 0,
 734                     new->is_stats.wtime);
 735                 if (t_delta) {
 736                         w_pct = (double)t_delta;
 737                         w_pct /= hr_etime;
 738                         w_pct *= 100.0;
 739 
 740                         /*
 741                          * Average the wait queue utilization over the
 742                          * the controller's devices, if this is a controller.
 743                          */
 744                         if (new->is_type == IODEV_CONTROLLER)
 745                                 w_pct /= new->is_nr_children;
 746                 } else
 747                         w_pct = 0.0;
 748 
 749                 /* % of time there is a transaction running */
 750                 t_delta = hrtime_delta(old ? old->is_stats.rtime : 0,
 751                     new->is_stats.rtime);
 752                 if (t_delta) {
 753                         r_pct = (double)t_delta;
 754                         r_pct /= hr_etime;
 755                         r_pct *= 100.0;
 756 
 757                         /*
 758                          * Average the percent busy over the controller's
 759                          * devices, if this is a controller.
 760                          */
 761                         if (new->is_type == IODEV_CONTROLLER)
 762                                 w_pct /= new->is_nr_children;
 763                 } else {
 764                         r_pct = 0.0;
 765                 }
 766 
 767                 /* % of time there is a transaction running */
 768                 if (do_interval) {
 769                         rps     *= etime;
 770                         wps     *= etime;
 771                         tps     *= etime;
 772                         krps    *= etime;
 773                         kwps    *= etime;
 774                         kps     *= etime;
 775                 }
 776         }
 777 
 778         if (do_disk & (DISK_EXTENDED | DISK_ERRORS)) {
 779                 if ((!do_conversions) && ((suppress_zero == 0) ||
 780                     ((do_disk & DISK_EXTENDED) == 0))) {
 781                         if (do_raw == 0) {
 782                                 push_out("%-*.*s",
 783                                     iodevs_nl, iodevs_nl, disk_name);
 784                         } else {
 785                                 push_out(disk_name);
 786                         }
 787                 }
 788         }
 789 
 790         /*
 791          * The error counters are read first (if asked for and if they are
 792          * available).
 793          */
 794         bzero(err_counters, sizeof (err_counters));
 795         toterrs = 0;
 796         if (display_err_counters && (new->is_errors.ks_data != NULL)) {
 797                 kstat_named_t   *knp;
 798                 int             i;
 799 
 800                 knp = KSTAT_NAMED_PTR(&new->is_errors);
 801                 for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++) {
 802                         switch (knp[i].data_type) {
 803                                 case KSTAT_DATA_ULONG:
 804                                 case KSTAT_DATA_ULONGLONG:
 805                                         err_counters[i] = knp[i].value.ui32;
 806                                         toterrs += knp[i].value.ui32;
 807                                         break;
 808                                 default:
 809                                         break;
 810                         }
 811                 }
 812         }
 813 
 814         switch (do_disk & DISK_IO_MASK) {
 815         case DISK_OLD:
 816                 if (do_raw == 0)
 817                         fstr = "%3.0f %3.0f %4.0f  ";
 818                 else
 819                         fstr = "%.0f,%.0f,%.0f";
 820                 push_out(fstr, kps, tps, serv);
 821                 break;
 822         case DISK_NEW:
 823                 if (do_raw == 0)
 824                         fstr = "%3.0f %3.0f %4.1f  ";
 825                 else
 826                         fstr = "%.0f,%.0f,%.1f";
 827                 push_out(fstr, rps, wps, r_pct);
 828                 break;
 829         case DISK_EXTENDED:
 830                 if (suppress_zero) {
 831                         if (fzero(rps) && fzero(wps) && fzero(krps) &&
 832                             fzero(kwps) && fzero(avw) && fzero(avr) &&
 833                             fzero(serv) && fzero(w_pct) && fzero(r_pct) &&
 834                             (toterrs == 0)) {
 835                                 doit = 0;
 836                                 display_err_counters = B_FALSE;
 837                         } else if (do_conversions == 0) {
 838                                 if (do_raw == 0) {
 839                                         push_out("%-*.*s",
 840                                             iodevs_nl, iodevs_nl, disk_name);
 841                                 } else {
 842                                         push_out(disk_name);
 843                                 }
 844                         }
 845                 }
 846                 if (doit) {
 847                         if (!do_conversions) {
 848                                 if (do_raw == 0) {
 849                                         fstr = " %6.1f %6.1f %6.1f %6.1f "
 850                                             "%4.1f %4.1f %6.1f %3.0f "
 851                                             "%3.0f ";
 852                                 } else {
 853                                         fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
 854                                             "%.1f,%.0f,%.0f";
 855                                 }
 856                                 push_out(fstr, rps, wps, krps, kwps, avw, avr,
 857                                     serv, w_pct, r_pct);
 858                         } else {
 859                                 if (do_raw == 0) {
 860                                         fstr = " %6.1f %6.1f %6.1f %6.1f "
 861                                             "%4.1f %4.1f %6.1f %6.1f "
 862                                             "%3.0f %3.0f ";
 863                                 } else {
 864                                         fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
 865                                             "%.1f,%.1f,%.0f,%.0f";
 866                                 }
 867                                 push_out(fstr, rps, wps, krps, kwps, avw, avr,
 868                                     wserv, rserv, w_pct, r_pct);
 869                         }
 870                 }
 871                 break;
 872         }
 873 
 874         if (display_err_counters) {
 875                 char    *efstr;
 876                 int     i;
 877 
 878                 if (do_raw == 0) {
 879                         if (do_disk == DISK_ERRORS)
 880                                 push_out(two_blanks);
 881                         efstr = "%3u ";
 882                 } else {
 883                         efstr = "%u";
 884                 }
 885 
 886                 for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++)
 887                         push_out(efstr, err_counters[i]);
 888 
 889                 push_out(efstr, toterrs);
 890         }
 891 
 892         if (suppress_zero == 0 || doit == 1) {
 893                 if ((do_disk & (DISK_EXTENDED | DISK_ERRORS)) &&
 894                     do_conversions) {
 895                         push_out("%s", disk_name);
 896                         if (show_mountpts && new->is_dname) {
 897                                 mnt_t *mount_pt;
 898                                 char *lu;
 899                                 char *dnlu;
 900                                 char lub[SMALL_SCRATCH_BUFLEN];
 901 
 902                                 lu = strrchr(new->is_dname, '/');
 903                                 if (lu) {
 904                                         /* only the part after a possible '/' */
 905                                         dnlu = strrchr(disk_name, '/');
 906                                         if (dnlu != NULL &&
 907                                             strcmp(dnlu, lu) == 0)
 908                                                 lu = new->is_dname;
 909                                         else {
 910                                                 *lu = 0;
 911                                                 (void) strcpy(lub,
 912                                                     new->is_dname);
 913                                                 *lu = '/';
 914                                                 (void) strcat(lub, "/");
 915                                                 (void) strcat(lub,
 916                                                     disk_name);
 917                                                 lu = lub;
 918                                         }
 919                                 } else
 920                                         lu = disk_name;
 921                                 mount_pt = lookup_mntent_byname(lu);
 922                                 if (mount_pt) {
 923                                         if (do_raw == 0)
 924                                                 push_out(" (%s)",
 925                                                     mount_pt->mount_point);
 926                                         else
 927                                                 push_out("(%s)",
 928                                                     mount_pt->mount_point);
 929                                 }
 930                         }
 931                 }
 932         }
 933 
 934         if ((do_disk & PRINT_VERTICAL) && show_disk_mode != SHOW_FIRST_ONLY)
 935                 do_newline();
 936 
 937         if (count != NULL)
 938                 (*count)++;
 939 }
 940 
 941 static void
 942 usage(void)
 943 {
 944         (void) fprintf(stderr,
 945             "Usage: iostat [-cCdDeEiImMnpPrstxXYz] "
 946             " [-l n] [-T d|u] [disk ...] [interval [count]]\n"
 947             "\t\t-c:    report percentage of time system has spent\n"
 948             "\t\t\tin user/system/dtrace/idle mode\n"
 949             "\t\t-C:    report disk statistics by controller\n"
 950             "\t\t-d:    display disk Kb/sec, transfers/sec, avg. \n"
 951             "\t\t\tservice time in milliseconds  \n"
 952             "\t\t-D:    display disk reads/sec, writes/sec, \n"
 953             "\t\t\tpercentage disk utilization \n"
 954             "\t\t-e:    report device error summary statistics\n"
 955             "\t\t-E:    report extended device error statistics\n"
 956             "\t\t-i:    show device IDs for -E output\n"
 957             "\t\t-I:    report the counts in each interval,\n"
 958             "\t\t\tinstead of rates, where applicable\n"
 959             "\t\t-l n:  Limit the number of disks to n\n"
 960             "\t\t-m:    Display mount points (most useful with -p)\n"
 961             "\t\t-M:    Display data throughput in MB/sec "
 962             "instead of Kb/sec\n"
 963             "\t\t-n:    convert device names to cXdYtZ format\n"
 964             "\t\t-p:    report per-partition disk statistics\n"
 965             "\t\t-P:    report per-partition disk statistics only,\n"
 966             "\t\t\tno per-device disk statistics\n"
 967             "\t\t-r:    Display data in comma separated format\n"
 968             "\t\t-s:    Suppress state change messages\n"
 969             "\t\t-T d|u Display a timestamp in date (d) or unix "
 970             "time_t (u)\n"
 971             "\t\t-t:    display chars read/written to terminals\n"
 972             "\t\t-x:    display extended disk statistics\n"
 973             "\t\t-X:    display I/O path statistics\n"
 974             "\t\t-Y:    display I/O path (I/T/L) statistics\n"
 975             "\t\t-z:    Suppress entries with all zero values\n");
 976         exit(1);
 977 }
 978 
 979 /*ARGSUSED*/
 980 static void
 981 show_disk_errors(void *v1, void *v2, void *d)
 982 {
 983         struct iodev_snapshot *disk = (struct iodev_snapshot *)v2;
 984         kstat_named_t *knp;
 985         size_t  col;
 986         int     i, len;
 987         char    *dev_name;
 988 
 989         if (disk->is_errors.ks_ndata == 0)
 990                 return;
 991         if (disk->is_type == IODEV_CONTROLLER)
 992                 return;
 993 
 994         dev_name = do_conversions ? disk->is_pretty : disk->is_name;
 995         dev_name = dev_name ? dev_name : disk->is_name;
 996 
 997         len = strlen(dev_name);
 998         if (len > 20)
 999                 push_out("%s ", dev_name);
1000         else if (len > 16)
1001                 push_out("%-20.20s ", dev_name);
1002         else {
1003                 if (do_conversions)
1004                         push_out("%-16.16s ", dev_name);
1005                 else
1006                         push_out("%-9.9s ", dev_name);
1007         }
1008         col = 0;
1009 
1010         knp = KSTAT_NAMED_PTR(&disk->is_errors);
1011         for (i = 0; i < disk->is_errors.ks_ndata; i++) {
1012                 /* skip kstats that the driver did not kstat_named_init */
1013                 if (knp[i].name[0] == 0)
1014                         continue;
1015 
1016                 col += strlen(knp[i].name);
1017 
1018                 switch (knp[i].data_type) {
1019                         case KSTAT_DATA_CHAR:
1020                         case KSTAT_DATA_STRING:
1021                                 if ((strcmp(knp[i].name, "Serial No") == 0) &&
1022                                     do_devid) {
1023                                         if (disk->is_devid) {
1024                                                 push_out("Device Id: %s ",
1025                                                     disk->is_devid);
1026                                                 col += strlen(disk->is_devid);
1027                                         } else {
1028                                                 push_out("Device Id: ");
1029                                         }
1030 
1031                                         break;
1032                                 }
1033                                 if (knp[i].data_type == KSTAT_DATA_CHAR) {
1034                                         push_out("%s: %-.16s ", knp[i].name,
1035                                             &knp[i].value.c[0]);
1036                                         col += strnlen(&knp[i].value.c[0], 16);
1037                                 } else {
1038                                         push_out("%s: %s ", knp[i].name,
1039                                             KSTAT_NAMED_STR_PTR(&knp[i]));
1040                                         col +=
1041                                             KSTAT_NAMED_STR_BUFLEN(&knp[i]) - 1;
1042                                 }
1043                                 break;
1044                         case KSTAT_DATA_ULONG:
1045                                 push_out("%s: %u ", knp[i].name,
1046                                     knp[i].value.ui32);
1047                                 col += 4;
1048                                 break;
1049                         case KSTAT_DATA_ULONGLONG:
1050                                 if (strcmp(knp[i].name, "Size") == 0) {
1051                                         do_newline();
1052                                         push_out("%s: %2.2fGB <%llu bytes>",
1053                                             knp[i].name,
1054                                             (float)knp[i].value.ui64 /
1055                                             DISK_GIGABYTE,
1056                                             knp[i].value.ui64);
1057                                         do_newline();
1058                                         col = 0;
1059                                         break;
1060                                 }
1061                                 push_out("%s: %u ", knp[i].name,
1062                                     knp[i].value.ui32);
1063                                 col += 4;
1064                                 break;
1065                         }
1066                 if ((col >= 62) || (i == 2)) {
1067                         do_newline();
1068                         col = 0;
1069                 }
1070         }
1071         if (col > 0) {
1072                 do_newline();
1073         }
1074         do_newline();
1075 }
1076 
1077 void
1078 do_args(int argc, char **argv)
1079 {
1080         int             c;
1081         int             errflg = 0;
1082         extern char     *optarg;
1083         extern int      optind;
1084 
1085         while ((c = getopt(argc, argv, "tdDxXYCciIpPnmMeEszrT:l:")) != EOF)
1086                 switch (c) {
1087                 case 't':
1088                         do_tty++;
1089                         break;
1090                 case 'd':
1091                         do_disk |= DISK_OLD;
1092                         break;
1093                 case 'D':
1094                         do_disk |= DISK_NEW;
1095                         break;
1096                 case 'x':
1097                         do_disk |= DISK_EXTENDED;
1098                         break;
1099                 case 'X':
1100                         if (do_disk & DISK_IOPATH_LTI)
1101                                 errflg++;       /* -Y already used */
1102                         else
1103                                 do_disk |= DISK_IOPATH_LI;
1104                         break;
1105                 case 'Y':
1106                         if (do_disk & DISK_IOPATH_LI)
1107                                 errflg++;       /* -X already used */
1108                         else
1109                                 do_disk |= DISK_IOPATH_LTI;
1110                         break;
1111                 case 'C':
1112                         do_controller++;
1113                         break;
1114                 case 'c':
1115                         do_cpu++;
1116                         break;
1117                 case 'I':
1118                         do_interval++;
1119                         break;
1120                 case 'p':
1121                         do_partitions++;
1122                         break;
1123                 case 'P':
1124                         do_partitions_only++;
1125                         break;
1126                 case 'n':
1127                         do_conversions++;
1128                         break;
1129                 case 'M':
1130                         do_megabytes++;
1131                         break;
1132                 case 'e':
1133                         do_disk |= DISK_ERRORS;
1134                         break;
1135                 case 'E':
1136                         do_disk |= DISK_EXTENDED_ERRORS;
1137                         break;
1138                 case 'i':
1139                         do_devid = 1;
1140                         break;
1141                 case 's':
1142                         suppress_state = 1;
1143                         break;
1144                 case 'z':
1145                         suppress_zero = 1;
1146                         break;
1147                 case 'm':
1148                         show_mountpts = 1;
1149                         break;
1150                 case 'T':
1151                         if (optarg) {
1152                                 if (*optarg == 'u')
1153                                         timestamp_fmt = UDATE;
1154                                 else if (*optarg == 'd')
1155                                         timestamp_fmt = DDATE;
1156                                 else
1157                                         errflg++;
1158                         } else {
1159                                 errflg++;
1160                         }
1161                         break;
1162                 case 'r':
1163                         do_raw = 1;
1164                         break;
1165                 case 'l':
1166                         df.if_max_iodevs = safe_strtoi(optarg, "invalid limit");
1167                         if (df.if_max_iodevs < 1)
1168                                 usage();
1169                         break;
1170                 case '?':
1171                         errflg++;
1172         }
1173 
1174         if ((do_disk & DISK_OLD) && (do_disk & DISK_NEW)) {
1175                 (void) fprintf(stderr, "-d and -D are incompatible.\n");
1176                 usage();
1177         }
1178 
1179         if (errflg) {
1180                 usage();
1181         }
1182 
1183         /* if no output classes explicity specified, use defaults */
1184         if (do_tty == 0 && do_disk == 0 && do_cpu == 0)
1185                 do_tty = do_cpu = 1, do_disk = DISK_OLD;
1186 
1187         /*
1188          * multi-path options (-X, -Y) without a specific vertical
1189          * output format (-x, -e, -E) imply extended -x format
1190          */
1191         if ((do_disk & (DISK_IOPATH_LI | DISK_IOPATH_LTI)) &&
1192             !(do_disk & PRINT_VERTICAL))
1193                 do_disk |= DISK_EXTENDED;
1194 
1195         /*
1196          * If conflicting options take the preferred
1197          * -D and -x result in -x
1198          * -d or -D and -e or -E gives only whatever -d or -D was specified
1199          */
1200         if ((do_disk & DISK_EXTENDED) && (do_disk & DISK_NORMAL))
1201                 do_disk &= ~DISK_NORMAL;
1202         if ((do_disk & DISK_NORMAL) && (do_disk & DISK_ERROR_MASK))
1203                 do_disk &= ~DISK_ERROR_MASK;
1204 
1205         /* nfs, tape, always shown */
1206         df.if_allowed_types = IODEV_NFS | IODEV_TAPE;
1207 
1208         /*
1209          * If limit == 0 then no command line limit was set, else if any of
1210          * the flags that cause unlimited disks were not set,
1211          * use the default of 4
1212          */
1213         if (df.if_max_iodevs == 0) {
1214                 df.if_max_iodevs = DEFAULT_LIMIT;
1215                 df.if_skip_floppy = 1;
1216                 if (do_disk & (DISK_EXTENDED | DISK_ERRORS |
1217                     DISK_EXTENDED_ERRORS)) {
1218                         df.if_max_iodevs = UNLIMITED_IODEVS;
1219                         df.if_skip_floppy = 0;
1220                 }
1221         }
1222         if (do_disk) {
1223                 size_t count = 0;
1224                 size_t i = optind;
1225 
1226                 while (i < argc && !isdigit(argv[i][0])) {
1227                         count++;
1228                         i++;
1229                 }
1230 
1231                 /*
1232                  * "Note:  disks  explicitly  requested
1233                  * are not subject to this disk limit"
1234                  */
1235                 if ((count > df.if_max_iodevs) ||
1236                     (count && (df.if_max_iodevs == UNLIMITED_IODEVS)))
1237                         df.if_max_iodevs = count;
1238 
1239                 df.if_names = safe_alloc(count * sizeof (char *));
1240                 (void) memset(df.if_names, 0, count * sizeof (char *));
1241 
1242                 df.if_nr_names = 0;
1243                 while (optind < argc && !isdigit(argv[optind][0]))
1244                         df.if_names[df.if_nr_names++] = argv[optind++];
1245         }
1246         if (optind < argc) {
1247                 interval = safe_strtoi(argv[optind], "invalid interval");
1248                 if (interval < 1)
1249                         fail(0, "invalid interval");
1250                 optind++;
1251 
1252                 if (optind < argc) {
1253                         iter = safe_strtoi(argv[optind], "invalid count");
1254                         if (iter < 1)
1255                                 fail(0, "invalid count");
1256                         optind++;
1257                 }
1258         }
1259         if (interval == 0)
1260                 iter = 1;
1261         if (optind < argc)
1262                 usage();
1263 }
1264 
1265 /*
1266  * Driver for doing the extended header formatting. Will produce
1267  * the function stack needed to output an extended header based
1268  * on the options selected.
1269  */
1270 
1271 void
1272 do_format(void)
1273 {
1274         char    header[SMALL_SCRATCH_BUFLEN] = {0};
1275         char    ch;
1276         char    iosz;
1277         const char    *fstr;
1278 
1279         disk_header[0] = 0;
1280         ch = (do_interval ? 'i' : 's');
1281         iosz = (do_megabytes ? 'M' : 'k');
1282         if (do_disk & DISK_ERRORS) {
1283                 if (do_raw == 0) {
1284                         (void) sprintf(header, "s/w h/w trn tot ");
1285                 } else
1286                         (void) sprintf(header, "s/w,h/w,trn,tot");
1287         }
1288         switch (do_disk & DISK_IO_MASK) {
1289                 case DISK_OLD:
1290                         if (do_raw == 0)
1291                                 fstr = "%cp%c tp%c serv  ";
1292                         else
1293                                 fstr = "%cp%c,tp%c,serv";
1294                         (void) snprintf(disk_header, sizeof (disk_header),
1295                             fstr, iosz, ch, ch);
1296                         break;
1297                 case DISK_NEW:
1298                         if (do_raw == 0)
1299                                 fstr = "rp%c wp%c util  ";
1300                         else
1301                                 fstr = "%rp%c,wp%c,util";
1302                         (void) snprintf(disk_header, sizeof (disk_header),
1303                             fstr, ch, ch);
1304                         break;
1305                 case DISK_EXTENDED:
1306                         /* This is -x option */
1307                         if (!do_conversions) {
1308                                 /* without -n option */
1309                                 if (do_raw == 0) {
1310                                         /* without -r option */
1311                                         (void) snprintf(disk_header,
1312                                             sizeof (disk_header),
1313                                             "%-*.*s    r/%c    w/%c   "
1314                                             "%cr/%c   %cw/%c wait actv  "
1315                                             "svc_t  %%%%w  %%%%b %s",
1316                                             iodevs_nl, iodevs_nl, "device",
1317                                             ch, ch, iosz, ch, iosz, ch, header);
1318                                 } else {
1319                                         /* with -r option */
1320                                         (void) snprintf(disk_header,
1321                                             sizeof (disk_header),
1322                                             "device,r/%c,w/%c,%cr/%c,%cw/%c,"
1323                                             "wait,actv,svc_t,%%%%w,"
1324                                             "%%%%b%s%s",
1325                                             ch, ch, iosz, ch, iosz, ch,
1326                                             *header == '\0' ? "" : ",",
1327                                             header);
1328                                         /*
1329                                          * if no -e flag, header == '\0...'
1330                                          * Ternary operator above is to prevent
1331                                          * trailing comma in full disk_header
1332                                          */
1333                                 }
1334                         } else {
1335                                 /* with -n option */
1336                                 if (do_raw == 0) {
1337                                         fstr = "    r/%c    w/%c   %cr/%c   "
1338                                             "%cw/%c wait actv wsvc_t asvc_t  "
1339                                             "%%%%w  %%%%b %sdevice";
1340                                 } else {
1341                                         fstr = "r/%c,w/%c,%cr/%c,%cw/%c,"
1342                                             "wait,actv,wsvc_t,asvc_t,"
1343                                             "%%%%w,%%%%b,%sdevice";
1344                                         /*
1345                                          * if -rnxe, "tot" (from -e) and
1346                                          * "device" are run together
1347                                          * due to lack of trailing comma
1348                                          * in 'header'. However, adding
1349                                          * trailing comma to header at
1350                                          * its definition leads to prob-
1351                                          * lems elsewhere so it's added
1352                                          * here in this edge case -rnxe
1353                                          */
1354                                         if (*header != '\0')
1355                                                 (void) strcat(header, ",");
1356                                 }
1357                                 (void) snprintf(disk_header,
1358                                     sizeof (disk_header),
1359                                     fstr, ch, ch, iosz, ch, iosz,
1360                                     ch, header);
1361                         }
1362                         break;
1363                 default:
1364                         break;
1365         }
1366 
1367         /* do DISK_ERRORS header (already added above for DISK_EXTENDED) */
1368         if ((do_disk & DISK_ERRORS) &&
1369             ((do_disk & DISK_IO_MASK) != DISK_EXTENDED)) {
1370                 if (!do_conversions) {
1371                         if (do_raw == 0)
1372                                 (void) snprintf(disk_header,
1373                                     sizeof (disk_header), "%-*.*s  %s",
1374                                     iodevs_nl, iodevs_nl, "device", header);
1375                         else
1376                                 (void) snprintf(disk_header,
1377                                     sizeof (disk_header), "device,%s", header);
1378                 } else {
1379                         if (do_raw == 0) {
1380                                 (void) snprintf(disk_header,
1381                                     sizeof (disk_header),
1382                                     "  %sdevice", header);
1383                         } else {
1384                                 (void) snprintf(disk_header,
1385                                     sizeof (disk_header),
1386                                     "%s,device", header);
1387                         }
1388                 }
1389         } else {
1390                 /*
1391                  * Need to subtract two characters for the % escape in
1392                  * the string.
1393                  */
1394                 dh_len = strlen(disk_header) - 2;
1395         }
1396 
1397         /*
1398          * -n *and* (-E *or* -e *or* -x)
1399          */
1400         if (do_conversions && (do_disk & PRINT_VERTICAL)) {
1401                 if (do_tty)
1402                         setup(print_tty_hdr1);
1403                 if (do_cpu)
1404                         setup(print_cpu_hdr1);
1405                 if (do_tty || do_cpu)
1406                         setup(do_newline);
1407                 if (do_tty)
1408                         setup(print_tty_hdr2);
1409                 if (do_cpu)
1410                         setup(print_cpu_hdr2);
1411                 if (do_tty || do_cpu)
1412                         setup(do_newline);
1413                 if (do_tty)
1414                         setup(print_tty_data);
1415                 if (do_cpu)
1416                         setup(print_cpu_data);
1417                 if (do_tty || do_cpu)
1418                         setup(do_newline);
1419                 printxhdr();
1420 
1421                 setup(show_all_disks);
1422         } else {
1423                 /*
1424                  * These unholy gymnastics are necessary to place CPU/tty
1425                  * data to the right of the disks/errors for the first
1426                  * line in vertical mode.
1427                  */
1428                 if (do_disk & PRINT_VERTICAL) {
1429                         printxhdr();
1430 
1431                         setup(show_first_disk);
1432                         if (do_tty)
1433                                 setup(print_tty_data);
1434                         if (do_cpu)
1435                                 setup(print_cpu_data);
1436                         setup(do_newline);
1437 
1438                         setup(show_other_disks);
1439                 } else {
1440                         setup(hdrout);
1441                         if (do_tty)
1442                                 setup(print_tty_data);
1443                         setup(show_all_disks);
1444                         if (do_cpu)
1445                                 setup(print_cpu_data);
1446                 }
1447 
1448                 setup(do_newline);
1449         }
1450         if (do_disk & DISK_EXTENDED_ERRORS)
1451                 setup(disk_errors);
1452 }
1453 
1454 /*
1455  * Add a new function to the list of functions
1456  * for this invocation. Once on the stack the
1457  * function is never removed nor does its place
1458  * change.
1459  */
1460 void
1461 setup(void (*nfunc)(void))
1462 {
1463         format_t *tmp;
1464 
1465         tmp = safe_alloc(sizeof (format_t));
1466         tmp->nfunc = nfunc;
1467         tmp->next = 0;
1468         if (formatter_end)
1469                 formatter_end->next = tmp;
1470         else
1471                 formatter_list = tmp;
1472         formatter_end = tmp;
1473 
1474 }
1475 
1476 /*
1477  * The functions after this comment are devoted to printing
1478  * various parts of the header. They are selected based on the
1479  * options provided when the program was invoked. The functions
1480  * are either directly invoked in printhdr() or are indirectly
1481  * invoked by being placed on the list of functions used when
1482  * extended headers are used.
1483  */
1484 void
1485 print_tty_hdr1(void)
1486 {
1487         char *fstr;
1488         char *dstr;
1489 
1490         if (do_raw == 0) {
1491                 fstr = "%10.10s";
1492                 dstr = "tty    ";
1493         } else {
1494                 fstr = "%s";
1495                 dstr = "tty";
1496         }
1497         push_out(fstr, dstr);
1498 }
1499 
1500 void
1501 print_tty_hdr2(void)
1502 {
1503         if (do_raw == 0)
1504                 push_out("%-10.10s", " tin tout");
1505         else
1506                 push_out("tin,tout");
1507 }
1508 
1509 void
1510 print_cpu_hdr1(void)
1511 {
1512         char *dstr;
1513 
1514         if (do_raw == 0)
1515                 dstr = "     cpu";
1516         else
1517                 dstr = "cpu";
1518         push_out(dstr);
1519 }
1520 
1521 void
1522 print_cpu_hdr2(void)
1523 {
1524         char *dstr;
1525 
1526         if (do_raw == 0)
1527                 dstr = " us sy dt id";
1528         else
1529                 dstr = "us,sy,dt,id";
1530         push_out(dstr);
1531 }
1532 
1533 /*
1534  * Assumption is that tty data is always first - no need for raw mode leading
1535  * comma.
1536  */
1537 void
1538 print_tty_data(void)
1539 {
1540         char *fstr;
1541         uint64_t deltas;
1542         double raw;
1543         double outch;
1544         kstat_t *oldks = NULL;
1545 
1546         if (oldss)
1547                 oldks = &oldss->s_sys.ss_agg_sys;
1548 
1549         if (do_raw == 0)
1550                 fstr = " %3.0f %4.0f ";
1551         else
1552                 fstr = "%.0f,%.0f";
1553         deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "rawch");
1554         raw = deltas;
1555         raw /= getime;
1556         deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "outch");
1557         outch = deltas;
1558         outch /= getime;
1559         push_out(fstr, raw, outch);
1560 }
1561 
1562 /*
1563  * Write out CPU data
1564  */
1565 void
1566 print_cpu_data(void)
1567 {
1568         char *fstr;
1569         uint64_t idle;
1570         uint64_t user;
1571         uint64_t kern;
1572         uint64_t dtrace;
1573         uint64_t nsec_elapsed;
1574         kstat_t *oldks = NULL;
1575 
1576         if (oldss)
1577                 oldks = &oldss->s_sys.ss_agg_sys;
1578 
1579         if (do_raw == 0)
1580                 fstr = " %2.0f %2.0f %2.0f %2.0f";
1581         else
1582                 fstr = "%.0f,%.0f,%.0f,%.0f";
1583 
1584         idle = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_idle");
1585         user = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_user");
1586         kern = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_kernel");
1587         dtrace = kstat_delta(oldks, &newss->s_sys.ss_agg_sys,
1588             "cpu_nsec_dtrace");
1589         nsec_elapsed = newss->s_sys.ss_agg_sys.ks_snaptime -
1590             (oldks == NULL ? 0 : oldks->ks_snaptime);
1591         push_out(fstr, user * percent, kern * percent,
1592             dtrace * 100.0 / nsec_elapsed / newss->s_nr_active_cpus,
1593             idle * percent);
1594 }
1595 
1596 /*
1597  * Emit the appropriate header.
1598  */
1599 void
1600 hdrout(void)
1601 {
1602         if (do_raw == 0) {
1603                 if (--tohdr == 0)
1604                         printhdr(0);
1605         } else if (hdr_out == 0) {
1606                 printhdr(0);
1607                 hdr_out = 1;
1608         }
1609 }
1610 
1611 /*
1612  * Write out disk errors when -E is specified.
1613  */
1614 void
1615 disk_errors(void)
1616 {
1617         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk_errors, NULL);
1618 }
1619 
1620 void
1621 show_first_disk(void)
1622 {
1623         int count = 0;
1624 
1625         show_disk_mode = SHOW_FIRST_ONLY;
1626 
1627         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1628 }
1629 
1630 void
1631 show_other_disks(void)
1632 {
1633         int count = 0;
1634 
1635         show_disk_mode = SHOW_SECOND_ONWARDS;
1636 
1637         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1638 }
1639 
1640 void
1641 show_all_disks(void)
1642 {
1643         int count = 0;
1644 
1645         show_disk_mode = SHOW_ALL;
1646 
1647         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1648 }
1649 
1650 /*
1651  * Write a newline out and clear the lineout flag.
1652  */
1653 static void
1654 do_newline(void)
1655 {
1656         if (lineout) {
1657                 (void) putchar('\n');
1658                 lineout = 0;
1659         }
1660 }
1661 
1662 /*
1663  * Generalized printf function that determines what extra
1664  * to print out if we're in raw mode. At this time we
1665  * don't care about errors.
1666  */
1667 static void
1668 push_out(const char *message, ...)
1669 {
1670         va_list args;
1671 
1672         va_start(args, message);
1673         if (do_raw && lineout == 1)
1674                 (void) putchar(',');
1675         (void) vprintf(message, args);
1676         va_end(args);
1677         lineout = 1;
1678 }
1679 
1680 /*
1681  * Emit the header string when -e is specified.
1682  */
1683 static void
1684 print_err_hdr(void)
1685 {
1686         char obuf[SMALL_SCRATCH_BUFLEN];
1687 
1688         if (do_raw) {
1689                 push_out("errors");
1690                 return;
1691         }
1692 
1693         if (do_conversions == 0) {
1694                 if (!(do_disk & DISK_EXTENDED)) {
1695                         (void) snprintf(obuf, sizeof (obuf),
1696                             "%11s", one_blank);
1697                         push_out(obuf);
1698                 }
1699         } else if (do_disk == DISK_ERRORS)
1700                 push_out(two_blanks);
1701         else
1702                 push_out(one_blank);
1703         push_out("---- errors --- ");
1704 }
1705 
1706 /*
1707  * Emit the header string when -e is specified.
1708  */
1709 static void
1710 print_disk_header(void)
1711 {
1712         push_out(disk_header);
1713 }
1714 
1715 /*
1716  * No, UINTMAX_MAX isn't the right thing here since
1717  * it is #defined to be either INT32_MAX or INT64_MAX
1718  * depending on the whether _LP64 is defined.
1719  *
1720  * We want to handle the odd future case of having
1721  * ulonglong_t be more than 64 bits but we have
1722  * no nice #define MAX value we can drop in place
1723  * without having to change this code in the future.
1724  */
1725 
1726 u_longlong_t
1727 ull_delta(u_longlong_t old, u_longlong_t new)
1728 {
1729         if (new >= old)
1730                 return (new - old);
1731         else
1732                 return ((UINT64_MAX - old) + new + 1);
1733 }
1734 
1735 /*
1736  * Take the difference of an unsigned 32
1737  * bit int attempting to cater for
1738  * overflow.
1739  */
1740 uint_t
1741 u32_delta(uint_t old, uint_t new)
1742 {
1743         if (new >= old)
1744                 return (new - old);
1745         else
1746                 return ((UINT32_MAX - old) + new + 1);
1747 }
1748 
1749 /*
1750  * This is exactly what is needed for standard iostat output,
1751  * but make sure to use it only for that
1752  */
1753 #define EPSILON (0.1)
1754 static int
1755 fzero(double value)
1756 {
1757         return (value >= 0.0 && value < EPSILON);
1758 }
1759 
1760 static int
1761 safe_strtoi(char const *val, char *errmsg)
1762 {
1763         char *end;
1764         long tmp;
1765 
1766         errno = 0;
1767         tmp = strtol(val, &end, 10);
1768         if (*end != '\0' || errno)
1769                 fail(0, "%s %s", errmsg, val);
1770         return ((int)tmp);
1771 }