1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2017 Joyent, Inc.
  24  */
  25 
  26 #include <mdb/mdb_param.h>
  27 #include <mdb/mdb_modapi.h>
  28 #include <mdb/mdb_ks.h>
  29 #include <sys/types.h>
  30 #include <sys/memlist.h>
  31 #include <sys/swap.h>
  32 #include <sys/systm.h>
  33 #include <sys/thread.h>
  34 #include <vm/anon.h>
  35 #include <vm/as.h>
  36 #include <vm/page.h>
  37 #include <sys/thread.h>
  38 #include <sys/swap.h>
  39 #include <sys/memlist.h>
  40 #include <sys/vnode.h>
  41 #include <vm/seg_map.h>
  42 #include <vm/seg_vn.h>
  43 #include <vm/seg_hole.h>
  44 
  45 #include "avl.h"
  46 #include "memory.h"
  47 
  48 /*
  49  * Page walker.
  50  * By default, this will walk all pages in the system.  If given an
  51  * address, it will walk all pages belonging to the vnode at that
  52  * address.
  53  */
  54 
  55 /*
  56  * page_walk_data
  57  *
  58  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
  59  * number of hash locations remaining in the page hash table when
  60  * walking all pages.
  61  *
  62  * The astute reader will notice that pw_hashloc is only used when
  63  * reading all pages (to hold a pointer to our location in the page
  64  * hash table), and that pw_first is only used when reading the pages
  65  * belonging to a particular vnode (to hold a pointer to the first
  66  * page).  While these could be combined to be a single pointer, they
  67  * are left separate for clarity.
  68  */
  69 typedef struct page_walk_data {
  70         long            pw_hashleft;
  71         void            **pw_hashloc;
  72         uintptr_t       pw_first;
  73 } page_walk_data_t;
  74 
  75 int
  76 page_walk_init(mdb_walk_state_t *wsp)
  77 {
  78         page_walk_data_t        *pwd;
  79         void    **ptr;
  80         size_t  hashsz;
  81         vnode_t vn;
  82 
  83         if (wsp->walk_addr == NULL) {
  84 
  85                 /*
  86                  * Walk all pages
  87                  */
  88 
  89                 if ((mdb_readvar(&ptr, "page_hash") == -1) ||
  90                     (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
  91                     (ptr == NULL) || (hashsz == 0)) {
  92                         mdb_warn("page_hash, page_hashsz not found or invalid");
  93                         return (WALK_ERR);
  94                 }
  95 
  96                 /*
  97                  * Since we are walking all pages, initialize hashleft
  98                  * to be the remaining number of entries in the page
  99                  * hash.  hashloc is set the start of the page hash
 100                  * table.  Setting the walk address to 0 indicates that
 101                  * we aren't currently following a hash chain, and that
 102                  * we need to scan the page hash table for a page.
 103                  */
 104                 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
 105                 pwd->pw_hashleft = hashsz;
 106                 pwd->pw_hashloc = ptr;
 107                 wsp->walk_addr = 0;
 108         } else {
 109 
 110                 /*
 111                  * Walk just this vnode
 112                  */
 113 
 114                 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
 115                         mdb_warn("unable to read vnode_t at %#lx",
 116                             wsp->walk_addr);
 117                         return (WALK_ERR);
 118                 }
 119 
 120                 /*
 121                  * We set hashleft to -1 to indicate that we are
 122                  * walking a vnode, and initialize first to 0 (it is
 123                  * used to terminate the walk, so it must not be set
 124                  * until after we have walked the first page).  The
 125                  * walk address is set to the first page.
 126                  */
 127                 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
 128                 pwd->pw_hashleft = -1;
 129                 pwd->pw_first = 0;
 130 
 131                 wsp->walk_addr = (uintptr_t)vn.v_pages;
 132         }
 133 
 134         wsp->walk_data = pwd;
 135 
 136         return (WALK_NEXT);
 137 }
 138 
 139 int
 140 page_walk_step(mdb_walk_state_t *wsp)
 141 {
 142         page_walk_data_t        *pwd = wsp->walk_data;
 143         page_t          page;
 144         uintptr_t       pp;
 145 
 146         pp = wsp->walk_addr;
 147 
 148         if (pwd->pw_hashleft < 0) {
 149 
 150                 /* We're walking a vnode's pages */
 151 
 152                 /*
 153                  * If we don't have any pages to walk, we have come
 154                  * back around to the first one (we finished), or we
 155                  * can't read the page we're looking at, we are done.
 156                  */
 157                 if (pp == NULL || pp == pwd->pw_first)
 158                         return (WALK_DONE);
 159                 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
 160                         mdb_warn("unable to read page_t at %#lx", pp);
 161                         return (WALK_ERR);
 162                 }
 163 
 164                 /*
 165                  * Set the walk address to the next page, and if the
 166                  * first page hasn't been set yet (i.e. we are on the
 167                  * first page), set it.
 168                  */
 169                 wsp->walk_addr = (uintptr_t)page.p_vpnext;
 170                 if (pwd->pw_first == NULL)
 171                         pwd->pw_first = pp;
 172 
 173         } else if (pwd->pw_hashleft > 0) {
 174 
 175                 /* We're walking all pages */
 176 
 177                 /*
 178                  * If pp (the walk address) is NULL, we scan through
 179                  * the page hash table until we find a page.
 180                  */
 181                 if (pp == NULL) {
 182 
 183                         /*
 184                          * Iterate through the page hash table until we
 185                          * find a page or reach the end.
 186                          */
 187                         do {
 188                                 if (mdb_vread(&pp, sizeof (uintptr_t),
 189                                     (uintptr_t)pwd->pw_hashloc) == -1) {
 190                                         mdb_warn("unable to read from %#p",
 191                                             pwd->pw_hashloc);
 192                                         return (WALK_ERR);
 193                                 }
 194                                 pwd->pw_hashleft--;
 195                                 pwd->pw_hashloc++;
 196                         } while (pwd->pw_hashleft && (pp == NULL));
 197 
 198                         /*
 199                          * We've reached the end; exit.
 200                          */
 201                         if (pp == NULL)
 202                                 return (WALK_DONE);
 203                 }
 204 
 205                 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
 206                         mdb_warn("unable to read page_t at %#lx", pp);
 207                         return (WALK_ERR);
 208                 }
 209 
 210                 /*
 211                  * Set the walk address to the next page.
 212                  */
 213                 wsp->walk_addr = (uintptr_t)page.p_hash;
 214 
 215         } else {
 216                 /* We've finished walking all pages. */
 217                 return (WALK_DONE);
 218         }
 219 
 220         return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
 221 }
 222 
 223 void
 224 page_walk_fini(mdb_walk_state_t *wsp)
 225 {
 226         mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
 227 }
 228 
 229 /*
 230  * allpages walks all pages in the system in order they appear in
 231  * the memseg structure
 232  */
 233 
 234 #define PAGE_BUFFER     128
 235 
 236 int
 237 allpages_walk_init(mdb_walk_state_t *wsp)
 238 {
 239         if (wsp->walk_addr != 0) {
 240                 mdb_warn("allpages only supports global walks.\n");
 241                 return (WALK_ERR);
 242         }
 243 
 244         if (mdb_layered_walk("memseg", wsp) == -1) {
 245                 mdb_warn("couldn't walk 'memseg'");
 246                 return (WALK_ERR);
 247         }
 248 
 249         wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
 250         return (WALK_NEXT);
 251 }
 252 
 253 int
 254 allpages_walk_step(mdb_walk_state_t *wsp)
 255 {
 256         const struct memseg *msp = wsp->walk_layer;
 257         page_t *buf = wsp->walk_data;
 258         size_t pg_read, i;
 259         size_t pg_num = msp->pages_end - msp->pages_base;
 260         const page_t *pg_addr = msp->pages;
 261 
 262         while (pg_num > 0) {
 263                 pg_read = MIN(pg_num, PAGE_BUFFER);
 264 
 265                 if (mdb_vread(buf, pg_read * sizeof (page_t),
 266                     (uintptr_t)pg_addr) == -1) {
 267                         mdb_warn("can't read page_t's at %#lx", pg_addr);
 268                         return (WALK_ERR);
 269                 }
 270                 for (i = 0; i < pg_read; i++) {
 271                         int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
 272                             &buf[i], wsp->walk_cbdata);
 273 
 274                         if (ret != WALK_NEXT)
 275                                 return (ret);
 276                 }
 277                 pg_num -= pg_read;
 278                 pg_addr += pg_read;
 279         }
 280 
 281         return (WALK_NEXT);
 282 }
 283 
 284 void
 285 allpages_walk_fini(mdb_walk_state_t *wsp)
 286 {
 287         mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
 288 }
 289 
 290 /*
 291  * Hash table + LRU queue.
 292  * This table is used to cache recently read vnodes for the memstat
 293  * command, to reduce the number of mdb_vread calls.  This greatly
 294  * speeds the memstat command on on live, large CPU count systems.
 295  */
 296 
 297 #define VN_SMALL        401
 298 #define VN_LARGE        10007
 299 #define VN_HTABLE_KEY(p, hp)    ((p) % ((hp)->vn_htable_buckets))
 300 
 301 struct vn_htable_list {
 302         uint_t vn_flag;                         /* v_flag from vnode    */
 303         uintptr_t vn_ptr;                       /* pointer to vnode     */
 304         struct vn_htable_list *vn_q_next;       /* queue next pointer   */
 305         struct vn_htable_list *vn_q_prev;       /* queue prev pointer   */
 306         struct vn_htable_list *vn_h_next;       /* hash table pointer   */
 307 };
 308 
 309 /*
 310  * vn_q_first        -> points to to head of queue: the vnode that was most
 311  *                      recently used
 312  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
 313  *                      vnode is read.
 314  * vn_htable         -> hash table
 315  * vn_htable_buf     -> contains htable objects
 316  * vn_htable_size    -> total number of items in the hash table
 317  * vn_htable_buckets -> number of buckets in the hash table
 318  */
 319 typedef struct vn_htable {
 320         struct vn_htable_list  *vn_q_first;
 321         struct vn_htable_list  *vn_q_last;
 322         struct vn_htable_list **vn_htable;
 323         struct vn_htable_list  *vn_htable_buf;
 324         int vn_htable_size;
 325         int vn_htable_buckets;
 326 } vn_htable_t;
 327 
 328 
 329 /* allocate memory, initilize hash table and LRU queue */
 330 static void
 331 vn_htable_init(vn_htable_t *hp, size_t vn_size)
 332 {
 333         int i;
 334         int htable_size = MAX(vn_size, VN_LARGE);
 335 
 336         if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
 337             * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
 338                 htable_size = VN_SMALL;
 339                 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
 340                     * htable_size, UM_SLEEP|UM_GC);
 341         }
 342 
 343         hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
 344             * htable_size, UM_SLEEP|UM_GC);
 345 
 346         hp->vn_q_first  = &hp->vn_htable_buf[0];
 347         hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
 348         hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
 349         hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
 350 
 351         for (i = 1; i < (htable_size-1); i++) {
 352                 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
 353                 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
 354         }
 355 
 356         hp->vn_htable_size = htable_size;
 357         hp->vn_htable_buckets = htable_size;
 358 }
 359 
 360 
 361 /*
 362  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
 363  * The function tries to find needed information in the following order:
 364  *
 365  * 1. check if ptr is the first in queue
 366  * 2. check if ptr is in hash table (if so move it to the top of queue)
 367  * 3. do mdb_vread, remove last queue item from queue and hash table.
 368  *    Insert new information to freed object, and put this object in to the
 369  *    top of the queue.
 370  */
 371 static int
 372 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
 373 {
 374         int hkey;
 375         struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
 376         struct vn_htable_list  *q_first = hp->vn_q_first;
 377 
 378         /* 1. vnode ptr is the first in queue, just get v_flag and return */
 379         if (q_first->vn_ptr == ptr) {
 380                 vp->v_flag = q_first->vn_flag;
 381 
 382                 return (0);
 383         }
 384 
 385         /* 2. search the hash table for this ptr */
 386         hkey = VN_HTABLE_KEY(ptr, hp);
 387         hent = hp->vn_htable[hkey];
 388         while (hent && (hent->vn_ptr != ptr))
 389                 hent = hent->vn_h_next;
 390 
 391         /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
 392         if (hent == NULL) {
 393                 struct vnode vn;
 394 
 395                 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
 396                         mdb_warn("unable to read vnode_t at %#lx", ptr);
 397                         return (-1);
 398                 }
 399 
 400                 /* we will insert read data into the last element in queue */
 401                 hent = hp->vn_q_last;
 402 
 403                 /* remove last hp->vn_q_last object from hash table */
 404                 if (hent->vn_ptr) {
 405                         htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
 406                         while (*htmp != hent)
 407                                 htmp = &(*htmp)->vn_h_next;
 408                         *htmp = hent->vn_h_next;
 409                 }
 410 
 411                 /* insert data into new free object */
 412                 hent->vn_ptr  = ptr;
 413                 hent->vn_flag = vn.v_flag;
 414 
 415                 /* insert new object into hash table */
 416                 hent->vn_h_next = hp->vn_htable[hkey];
 417                 hp->vn_htable[hkey] = hent;
 418         }
 419 
 420         /* Remove from queue. hent is not first, vn_q_prev is not NULL */
 421         q_next = hent->vn_q_next;
 422         q_prev = hent->vn_q_prev;
 423         if (q_next == NULL)
 424                 hp->vn_q_last = q_prev;
 425         else
 426                 q_next->vn_q_prev = q_prev;
 427         q_prev->vn_q_next = q_next;
 428 
 429         /* Add to the front of queue */
 430         hent->vn_q_prev = NULL;
 431         hent->vn_q_next = q_first;
 432         q_first->vn_q_prev = hent;
 433         hp->vn_q_first = hent;
 434 
 435         /* Set v_flag in vnode pointer from hent */
 436         vp->v_flag = hent->vn_flag;
 437 
 438         return (0);
 439 }
 440 
 441 /* Summary statistics of pages */
 442 typedef struct memstat {
 443         struct vnode    *ms_kvp;        /* Cached address of kernel vnode */
 444         struct vnode    *ms_unused_vp;  /* Unused pages vnode pointer     */
 445         struct vnode    *ms_zvp;        /* Cached address of zio vnode    */
 446         uint64_t        ms_kmem;        /* Pages of kernel memory         */
 447         uint64_t        ms_zfs_data;    /* Pages of zfs data              */
 448         uint64_t        ms_anon;        /* Pages of anonymous memory      */
 449         uint64_t        ms_vnode;       /* Pages of named (vnode) memory  */
 450         uint64_t        ms_exec;        /* Pages of exec/library memory   */
 451         uint64_t        ms_cachelist;   /* Pages on the cachelist (free)  */
 452         uint64_t        ms_bootpages;   /* Pages on the bootpages list    */
 453         uint64_t        ms_total;       /* Pages on page hash             */
 454         vn_htable_t     *ms_vn_htable;  /* Pointer to hash table          */
 455         struct vnode    ms_vn;          /* vnode buffer                   */
 456 } memstat_t;
 457 
 458 #define MS_PP_ISKAS(pp, stats)                          \
 459         ((pp)->p_vnode == (stats)->ms_kvp)
 460 
 461 #define MS_PP_ISZFS_DATA(pp, stats)                     \
 462         (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
 463 
 464 /*
 465  * Summarize pages by type and update stat information
 466  */
 467 
 468 /* ARGSUSED */
 469 static int
 470 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
 471 {
 472         struct vnode *vp = &stats->ms_vn;
 473 
 474         if (PP_ISBOOTPAGES(pp))
 475                 stats->ms_bootpages++;
 476         else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
 477                 return (WALK_NEXT);
 478         else if (MS_PP_ISKAS(pp, stats))
 479                 stats->ms_kmem++;
 480         else if (MS_PP_ISZFS_DATA(pp, stats))
 481                 stats->ms_zfs_data++;
 482         else if (PP_ISFREE(pp))
 483                 stats->ms_cachelist++;
 484         else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
 485                 return (WALK_ERR);
 486         else if (IS_SWAPFSVP(vp))
 487                 stats->ms_anon++;
 488         else if ((vp->v_flag & VVMEXEC) != 0)
 489                 stats->ms_exec++;
 490         else
 491                 stats->ms_vnode++;
 492 
 493         stats->ms_total++;
 494 
 495         return (WALK_NEXT);
 496 }
 497 
 498 /* ARGSUSED */
 499 int
 500 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 501 {
 502         pgcnt_t total_pages, physmem;
 503         ulong_t freemem;
 504         memstat_t stats;
 505         GElf_Sym sym;
 506         vn_htable_t ht;
 507         struct vnode *kvps;
 508         uintptr_t vn_size = 0;
 509 
 510         bzero(&stats, sizeof (memstat_t));
 511 
 512         /*
 513          * -s size, is an internal option. It specifies the size of vn_htable.
 514          * Hash table size is set in the following order:
 515          * If user has specified the size that is larger than VN_LARGE: try it,
 516          * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
 517          * failed to allocate default to VN_SMALL.
 518          * For a better efficiency of hash table it is highly recommended to
 519          * set size to a prime number.
 520          */
 521         if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
 522             's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
 523                 return (DCMD_USAGE);
 524 
 525         /* Initialize vnode hash list and queue */
 526         vn_htable_init(&ht, vn_size);
 527         stats.ms_vn_htable = &ht;
 528 
 529         /* Total physical memory */
 530         if (mdb_readvar(&total_pages, "total_pages") == -1) {
 531                 mdb_warn("unable to read total_pages");
 532                 return (DCMD_ERR);
 533         }
 534 
 535         /* Artificially limited memory */
 536         if (mdb_readvar(&physmem, "physmem") == -1) {
 537                 mdb_warn("unable to read physmem");
 538                 return (DCMD_ERR);
 539         }
 540 
 541         /* read kernel vnode array pointer */
 542         if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
 543             (GElf_Sym *)&sym) == -1) {
 544                 mdb_warn("unable to read kvps");
 545                 return (DCMD_ERR);
 546         }
 547         kvps = (struct vnode *)(uintptr_t)sym.st_value;
 548         stats.ms_kvp =  &kvps[KV_KVP];
 549 
 550         /*
 551          * Read the zio vnode pointer.
 552          */
 553         stats.ms_zvp = &kvps[KV_ZVP];
 554 
 555         /*
 556          * If physmem != total_pages, then the administrator has limited the
 557          * number of pages available in the system.  Excluded pages are
 558          * associated with the unused pages vnode.  Read this vnode so the
 559          * pages can be excluded in the page accounting.
 560          */
 561         if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
 562             (GElf_Sym *)&sym) == -1) {
 563                 mdb_warn("unable to read unused_pages_vp");
 564                 return (DCMD_ERR);
 565         }
 566         stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
 567 
 568         /* walk all pages, collect statistics */
 569         if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
 570             &stats) == -1) {
 571                 mdb_warn("can't walk memseg");
 572                 return (DCMD_ERR);
 573         }
 574 
 575 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
 576                 ((physmem) * 10)))
 577 
 578         mdb_printf("Page Summary                Pages                MB"
 579             "  %%Tot\n");
 580         mdb_printf("------------     ----------------  ----------------"
 581             "  ----\n");
 582         mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
 583             stats.ms_kmem,
 584             (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
 585             MS_PCT_TOTAL(stats.ms_kmem));
 586 
 587         if (stats.ms_bootpages != 0) {
 588                 mdb_printf("Boot pages       %16llu  %16llu  %3lu%%\n",
 589                     stats.ms_bootpages,
 590                     (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024),
 591                     MS_PCT_TOTAL(stats.ms_bootpages));
 592         }
 593 
 594         if (stats.ms_zfs_data != 0) {
 595                 mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
 596                     stats.ms_zfs_data,
 597                     (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
 598                     MS_PCT_TOTAL(stats.ms_zfs_data));
 599         }
 600 
 601         mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
 602             stats.ms_anon,
 603             (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
 604             MS_PCT_TOTAL(stats.ms_anon));
 605         mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
 606             stats.ms_exec,
 607             (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
 608             MS_PCT_TOTAL(stats.ms_exec));
 609         mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
 610             stats.ms_vnode,
 611             (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
 612             MS_PCT_TOTAL(stats.ms_vnode));
 613         mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
 614             stats.ms_cachelist,
 615             (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
 616             MS_PCT_TOTAL(stats.ms_cachelist));
 617 
 618         /*
 619          * occasionally, we double count pages above.  To avoid printing
 620          * absurdly large values for freemem, we clamp it at zero.
 621          */
 622         if (physmem > stats.ms_total)
 623                 freemem = physmem - stats.ms_total;
 624         else
 625                 freemem = 0;
 626 
 627         mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
 628             (uint64_t)freemem * PAGESIZE / (1024 * 1024),
 629             MS_PCT_TOTAL(freemem));
 630 
 631         mdb_printf("\nTotal            %16lu  %16lu\n",
 632             physmem,
 633             (uint64_t)physmem * PAGESIZE / (1024 * 1024));
 634 
 635         if (physmem != total_pages) {
 636                 mdb_printf("Physical         %16lu  %16lu\n",
 637                     total_pages,
 638                     (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
 639         }
 640 
 641 #undef MS_PCT_TOTAL
 642 
 643         return (DCMD_OK);
 644 }
 645 
 646 void
 647 pagelookup_help(void)
 648 {
 649         mdb_printf(
 650             "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
 651             "\n"
 652             "Can be invoked three different ways:\n\n"
 653             "    ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
 654             "    %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
 655             "    %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
 656             "\n"
 657             "The latter two forms are useful in pipelines.\n");
 658 }
 659 
 660 int
 661 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 662 {
 663         uintptr_t vp = -(uintptr_t)1;
 664         uint64_t offset = -(uint64_t)1;
 665 
 666         uintptr_t pageaddr;
 667         int hasaddr = (flags & DCMD_ADDRSPEC);
 668         int usedaddr = 0;
 669 
 670         if (mdb_getopts(argc, argv,
 671             'v', MDB_OPT_UINTPTR, &vp,
 672             'o', MDB_OPT_UINT64, &offset,
 673             0) != argc) {
 674                 return (DCMD_USAGE);
 675         }
 676 
 677         if (vp == -(uintptr_t)1) {
 678                 if (offset == -(uint64_t)1) {
 679                         mdb_warn(
 680                             "pagelookup: at least one of -v vp or -o offset "
 681                             "required.\n");
 682                         return (DCMD_USAGE);
 683                 }
 684                 vp = addr;
 685                 usedaddr = 1;
 686         } else if (offset == -(uint64_t)1) {
 687                 offset = mdb_get_dot();
 688                 usedaddr = 1;
 689         }
 690         if (usedaddr && !hasaddr) {
 691                 mdb_warn("pagelookup: address required\n");
 692                 return (DCMD_USAGE);
 693         }
 694         if (!usedaddr && hasaddr) {
 695                 mdb_warn(
 696                     "pagelookup: address specified when both -v and -o were "
 697                     "passed");
 698                 return (DCMD_USAGE);
 699         }
 700 
 701         pageaddr = mdb_page_lookup(vp, offset);
 702         if (pageaddr == 0) {
 703                 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
 704                     vp, offset);
 705                 return (DCMD_OK);
 706         }
 707         mdb_printf("%#lr\n", pageaddr);         /* this is PIPE_OUT friendly */
 708         return (DCMD_OK);
 709 }
 710 
 711 /*ARGSUSED*/
 712 int
 713 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 714 {
 715         uintptr_t pp;
 716 
 717         if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
 718                 return (DCMD_USAGE);
 719         }
 720 
 721         pp = mdb_pfn2page((pfn_t)addr);
 722         if (pp == 0) {
 723                 return (DCMD_ERR);
 724         }
 725 
 726         if (flags & DCMD_PIPE_OUT) {
 727                 mdb_printf("%#lr\n", pp);
 728         } else {
 729                 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
 730         }
 731 
 732         return (DCMD_OK);
 733 }
 734 
 735 int
 736 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 737 {
 738         page_t  p;
 739 
 740         if (!(flags & DCMD_ADDRSPEC)) {
 741                 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
 742                         mdb_warn("can't walk pages");
 743                         return (DCMD_ERR);
 744                 }
 745                 return (DCMD_OK);
 746         }
 747 
 748         if (DCMD_HDRSPEC(flags)) {
 749                 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
 750                     "PAGE", "VNODE", "OFFSET", "SELOCK",
 751                     "LCT", "COW", "IO", "FS", "ST");
 752         }
 753 
 754         if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
 755                 mdb_warn("can't read page_t at %#lx", addr);
 756                 return (DCMD_ERR);
 757         }
 758 
 759         mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
 760             addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
 761             p.p_iolock_state, p.p_fsdata, p.p_state);
 762 
 763         return (DCMD_OK);
 764 }
 765 
 766 int
 767 swap_walk_init(mdb_walk_state_t *wsp)
 768 {
 769         void    *ptr;
 770 
 771         if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
 772                 mdb_warn("swapinfo not found or invalid");
 773                 return (WALK_ERR);
 774         }
 775 
 776         wsp->walk_addr = (uintptr_t)ptr;
 777 
 778         return (WALK_NEXT);
 779 }
 780 
 781 int
 782 swap_walk_step(mdb_walk_state_t *wsp)
 783 {
 784         uintptr_t       sip;
 785         struct swapinfo si;
 786 
 787         sip = wsp->walk_addr;
 788 
 789         if (sip == NULL)
 790                 return (WALK_DONE);
 791 
 792         if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
 793                 mdb_warn("unable to read swapinfo at %#lx", sip);
 794                 return (WALK_ERR);
 795         }
 796 
 797         wsp->walk_addr = (uintptr_t)si.si_next;
 798 
 799         return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
 800 }
 801 
 802 int
 803 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 804 {
 805         struct swapinfo si;
 806         char            *name;
 807 
 808         if (!(flags & DCMD_ADDRSPEC)) {
 809                 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
 810                         mdb_warn("can't walk swapinfo");
 811                         return (DCMD_ERR);
 812                 }
 813                 return (DCMD_OK);
 814         }
 815 
 816         if (DCMD_HDRSPEC(flags)) {
 817                 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
 818                     "ADDR", "VNODE", "PAGES", "FREE", "NAME");
 819         }
 820 
 821         if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
 822                 mdb_warn("can't read swapinfo at %#lx", addr);
 823                 return (DCMD_ERR);
 824         }
 825 
 826         name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
 827         if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
 828                 name = "*error*";
 829 
 830         mdb_printf("%0?lx %?p %9d %9d %s\n",
 831             addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
 832 
 833         return (DCMD_OK);
 834 }
 835 
 836 int
 837 memlist_walk_step(mdb_walk_state_t *wsp)
 838 {
 839         uintptr_t       mlp;
 840         struct memlist  ml;
 841 
 842         mlp = wsp->walk_addr;
 843 
 844         if (mlp == NULL)
 845                 return (WALK_DONE);
 846 
 847         if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
 848                 mdb_warn("unable to read memlist at %#lx", mlp);
 849                 return (WALK_ERR);
 850         }
 851 
 852         wsp->walk_addr = (uintptr_t)ml.ml_next;
 853 
 854         return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
 855 }
 856 
 857 int
 858 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 859 {
 860         struct memlist  ml;
 861 
 862         if (!(flags & DCMD_ADDRSPEC)) {
 863                 uintptr_t ptr;
 864                 uint_t list = 0;
 865                 int i;
 866                 static const char *lists[] = {
 867                         "phys_install",
 868                         "phys_avail",
 869                         "virt_avail"
 870                 };
 871 
 872                 if (mdb_getopts(argc, argv,
 873                     'i', MDB_OPT_SETBITS, (1 << 0), &list,
 874                     'a', MDB_OPT_SETBITS, (1 << 1), &list,
 875                     'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
 876                         return (DCMD_USAGE);
 877 
 878                 if (!list)
 879                         list = 1;
 880 
 881                 for (i = 0; list; i++, list >>= 1) {
 882                         if (!(list & 1))
 883                                 continue;
 884                         if ((mdb_readvar(&ptr, lists[i]) == -1) ||
 885                             (ptr == NULL)) {
 886                                 mdb_warn("%s not found or invalid", lists[i]);
 887                                 return (DCMD_ERR);
 888                         }
 889 
 890                         mdb_printf("%s:\n", lists[i]);
 891                         if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
 892                             ptr) == -1) {
 893                                 mdb_warn("can't walk memlist");
 894                                 return (DCMD_ERR);
 895                         }
 896                 }
 897                 return (DCMD_OK);
 898         }
 899 
 900         if (DCMD_HDRSPEC(flags))
 901                 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
 902 
 903         if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
 904                 mdb_warn("can't read memlist at %#lx", addr);
 905                 return (DCMD_ERR);
 906         }
 907 
 908         mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
 909 
 910         return (DCMD_OK);
 911 }
 912 
 913 int
 914 seg_walk_init(mdb_walk_state_t *wsp)
 915 {
 916         if (wsp->walk_addr == NULL) {
 917                 mdb_warn("seg walk must begin at struct as *\n");
 918                 return (WALK_ERR);
 919         }
 920 
 921         /*
 922          * this is really just a wrapper to AVL tree walk
 923          */
 924         wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
 925         return (avl_walk_init(wsp));
 926 }
 927 
 928 /*ARGSUSED*/
 929 int
 930 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 931 {
 932         struct seg s;
 933 
 934         if (argc != 0)
 935                 return (DCMD_USAGE);
 936 
 937         if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
 938                 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
 939                     "SEG", "BASE", "SIZE", "DATA", "OPS");
 940         }
 941 
 942         if (mdb_vread(&s, sizeof (s), addr) == -1) {
 943                 mdb_warn("failed to read seg at %p", addr);
 944                 return (DCMD_ERR);
 945         }
 946 
 947         mdb_printf("%?p %?p %?lx %?p %a\n",
 948             addr, s.s_base, s.s_size, s.s_data, s.s_ops);
 949 
 950         return (DCMD_OK);
 951 }
 952 
 953 typedef struct pmap_walk_types {
 954         uintptr_t pwt_segvn;
 955         uintptr_t pwt_seghole;
 956 } pmap_walk_types_t;
 957 
 958 /*ARGSUSED*/
 959 static int
 960 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
 961 {
 962         pgcnt_t *nres = out;
 963 
 964         (*nres)++;
 965 
 966         return (WALK_NEXT);
 967 }
 968 
 969 static int
 970 pmap_walk_seg(uintptr_t addr, const struct seg *seg,
 971     const pmap_walk_types_t *types)
 972 {
 973         const uintptr_t ops = (uintptr_t)seg->s_ops;
 974 
 975         mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
 976 
 977         if (ops == types->pwt_segvn && seg->s_data != NULL) {
 978                 struct segvn_data svn;
 979                 pgcnt_t nres = 0;
 980 
 981                 svn.vp = NULL;
 982                 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
 983 
 984                 /*
 985                  * Use the segvn_pages walker to find all of the in-core pages
 986                  * for this mapping.
 987                  */
 988                 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
 989                     (uintptr_t)seg->s_data) == -1) {
 990                         mdb_warn("failed to walk segvn_pages (s_data=%p)",
 991                             seg->s_data);
 992                 }
 993                 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
 994 
 995                 if (svn.vp != NULL) {
 996                         char buf[29];
 997 
 998                         mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
 999                         mdb_printf(" %s", buf);
1000                 } else {
1001                         mdb_printf(" [ anon ]");
1002                 }
1003         } else if (ops == types->pwt_seghole && seg->s_data != NULL) {
1004                 seghole_data_t shd;
1005                 char name[16];
1006 
1007                 (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data);
1008                 if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name),
1009                     (uintptr_t)shd.shd_name) == 0) {
1010                         name[0] = '\0';
1011                 }
1012 
1013                 mdb_printf(" %8s [ hole%s%s ]", "-",
1014                     name[0] == '0' ? "" : ":", name);
1015         } else {
1016                 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1017         }
1018 
1019         mdb_printf("\n");
1020         return (WALK_NEXT);
1021 }
1022 
1023 static int
1024 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg,
1025     const pmap_walk_types_t *types)
1026 {
1027         const uintptr_t ops = (uintptr_t)seg->s_ops;
1028 
1029         mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1030 
1031         if (ops == types->pwt_segvn && seg->s_data != NULL) {
1032                 struct segvn_data svn;
1033 
1034                 svn.vp = NULL;
1035                 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1036 
1037                 if (svn.vp != NULL) {
1038                         mdb_printf(" %0?p", svn.vp);
1039                 } else {
1040                         mdb_printf(" [ anon ]");
1041                 }
1042         } else {
1043                 mdb_printf(" [ &%a ]", seg->s_ops);
1044         }
1045 
1046         mdb_printf("\n");
1047         return (WALK_NEXT);
1048 }
1049 
1050 /*ARGSUSED*/
1051 int
1052 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1053 {
1054         proc_t proc;
1055         uint_t quick = FALSE;
1056         mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1057         pmap_walk_types_t wtypes = { 0 };
1058 
1059         GElf_Sym sym;
1060 
1061         if (!(flags & DCMD_ADDRSPEC))
1062                 return (DCMD_USAGE);
1063 
1064         if (mdb_getopts(argc, argv,
1065             'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1066                 return (DCMD_USAGE);
1067 
1068         if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1069                 mdb_warn("failed to read proc at %p", addr);
1070                 return (DCMD_ERR);
1071         }
1072 
1073         if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1074                 wtypes.pwt_segvn = (uintptr_t)sym.st_value;
1075         if (mdb_lookup_by_name("seghole_ops", &sym) == 0)
1076                 wtypes.pwt_seghole = (uintptr_t)sym.st_value;
1077 
1078         mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1079 
1080         if (quick) {
1081                 mdb_printf("VNODE\n");
1082                 cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1083         } else {
1084                 mdb_printf("%8s %s\n", "RES", "PATH");
1085         }
1086 
1087         if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) {
1088                 mdb_warn("failed to walk segments of as %p", proc.p_as);
1089                 return (DCMD_ERR);
1090         }
1091 
1092         return (DCMD_OK);
1093 }
1094 
1095 typedef struct anon_walk_data {
1096         uintptr_t *aw_levone;
1097         uintptr_t *aw_levtwo;
1098         size_t aw_minslot;
1099         size_t aw_maxslot;
1100         pgcnt_t aw_nlevone;
1101         pgcnt_t aw_levone_ndx;
1102         size_t aw_levtwo_ndx;
1103         struct anon_map *aw_ampp;
1104         struct anon_map aw_amp;
1105         struct anon_hdr aw_ahp;
1106         int             aw_all; /* report all anon pointers, even NULLs */
1107 } anon_walk_data_t;
1108 
1109 int
1110 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1111 {
1112         anon_walk_data_t *aw;
1113 
1114         if (wsp->walk_addr == NULL) {
1115                 mdb_warn("anon walk doesn't support global walks\n");
1116                 return (WALK_ERR);
1117         }
1118 
1119         aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1120         aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1121 
1122         if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1123                 mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1124                 mdb_free(aw, sizeof (anon_walk_data_t));
1125                 return (WALK_ERR);
1126         }
1127 
1128         if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1129             (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1130                 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1131                 mdb_free(aw, sizeof (anon_walk_data_t));
1132                 return (WALK_ERR);
1133         }
1134 
1135         /* update min and maxslot with the given constraints */
1136         maxslot = MIN(maxslot, aw->aw_ahp.size);
1137         minslot = MIN(minslot, maxslot);
1138 
1139         if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1140             (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1141                 aw->aw_nlevone = maxslot;
1142                 aw->aw_levone_ndx = minslot;
1143                 aw->aw_levtwo = NULL;
1144         } else {
1145                 aw->aw_nlevone =
1146                     (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1147                 aw->aw_levone_ndx = 0;
1148                 aw->aw_levtwo =
1149                     mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1150         }
1151 
1152         aw->aw_levone =
1153             mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1154         aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1155 
1156         mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1157             (uintptr_t)aw->aw_ahp.array_chunk);
1158 
1159         aw->aw_levtwo_ndx = 0;
1160         aw->aw_minslot = minslot;
1161         aw->aw_maxslot = maxslot;
1162 
1163 out:
1164         wsp->walk_data = aw;
1165         return (0);
1166 }
1167 
1168 int
1169 anon_walk_step(mdb_walk_state_t *wsp)
1170 {
1171         anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1172         struct anon anon;
1173         uintptr_t anonptr;
1174         ulong_t slot;
1175 
1176         /*
1177          * Once we've walked through level one, we're done.
1178          */
1179         if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1180                 return (WALK_DONE);
1181         }
1182 
1183         if (aw->aw_levtwo == NULL) {
1184                 anonptr = aw->aw_levone[aw->aw_levone_ndx];
1185                 aw->aw_levone_ndx++;
1186         } else {
1187                 if (aw->aw_levtwo_ndx == 0) {
1188                         uintptr_t levtwoptr;
1189 
1190                         /* The first time through, skip to our first index. */
1191                         if (aw->aw_levone_ndx == 0) {
1192                                 aw->aw_levone_ndx =
1193                                     aw->aw_minslot / ANON_CHUNK_SIZE;
1194                                 aw->aw_levtwo_ndx =
1195                                     aw->aw_minslot % ANON_CHUNK_SIZE;
1196                         }
1197 
1198                         levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1199 
1200                         if (levtwoptr == NULL) {
1201                                 if (!aw->aw_all) {
1202                                         aw->aw_levtwo_ndx = 0;
1203                                         aw->aw_levone_ndx++;
1204                                         return (WALK_NEXT);
1205                                 }
1206                                 bzero(aw->aw_levtwo,
1207                                     ANON_CHUNK_SIZE * sizeof (uintptr_t));
1208 
1209                         } else if (mdb_vread(aw->aw_levtwo,
1210                             ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1211                             -1) {
1212                                 mdb_warn("unable to read anon_map %p's "
1213                                     "second-level map %d at %p",
1214                                     aw->aw_ampp, aw->aw_levone_ndx,
1215                                     levtwoptr);
1216                                 return (WALK_ERR);
1217                         }
1218                 }
1219                 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1220                 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1221 
1222                 /* update the indices for next time */
1223                 aw->aw_levtwo_ndx++;
1224                 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1225                         aw->aw_levtwo_ndx = 0;
1226                         aw->aw_levone_ndx++;
1227                 }
1228 
1229                 /* make sure the slot # is in the requested range */
1230                 if (slot >= aw->aw_maxslot) {
1231                         return (WALK_DONE);
1232                 }
1233         }
1234 
1235         if (anonptr != NULL) {
1236                 mdb_vread(&anon, sizeof (anon), anonptr);
1237                 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1238         }
1239         if (aw->aw_all) {
1240                 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1241         }
1242         return (WALK_NEXT);
1243 }
1244 
1245 void
1246 anon_walk_fini(mdb_walk_state_t *wsp)
1247 {
1248         anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1249 
1250         if (aw->aw_levtwo != NULL)
1251                 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1252 
1253         mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1254         mdb_free(aw, sizeof (anon_walk_data_t));
1255 }
1256 
1257 int
1258 anon_walk_init(mdb_walk_state_t *wsp)
1259 {
1260         return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1261 }
1262 
1263 int
1264 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1265 {
1266         const uintptr_t         svd_addr = wsp->walk_addr;
1267         uintptr_t               amp_addr;
1268         uintptr_t               seg_addr;
1269         struct segvn_data       svd;
1270         struct anon_map         amp;
1271         struct seg              seg;
1272 
1273         if (svd_addr == NULL) {
1274                 mdb_warn("segvn_anon walk doesn't support global walks\n");
1275                 return (WALK_ERR);
1276         }
1277         if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1278                 mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1279                     svd_addr);
1280                 return (WALK_ERR);
1281         }
1282         if (svd.amp == NULL) {
1283                 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1284                     svd_addr);
1285                 return (WALK_ERR);
1286         }
1287         amp_addr = (uintptr_t)svd.amp;
1288         if (mdb_vread(&amp, sizeof (amp), amp_addr) == -1) {
1289                 mdb_warn("segvn_anon walk: unable to read amp %p for "
1290                     "segvn_data %p", amp_addr, svd_addr);
1291                 return (WALK_ERR);
1292         }
1293         seg_addr = (uintptr_t)svd.seg;
1294         if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1295                 mdb_warn("segvn_anon walk: unable to read seg %p for "
1296                     "segvn_data %p", seg_addr, svd_addr);
1297                 return (WALK_ERR);
1298         }
1299         if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1300                 mdb_warn("anon map %p is too small for segment %p\n",
1301                     amp_addr, seg_addr);
1302                 return (WALK_ERR);
1303         }
1304 
1305         wsp->walk_addr = amp_addr;
1306         return (anon_walk_init_common(wsp,
1307             svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1308 }
1309 
1310 
1311 typedef struct {
1312         u_offset_t              svs_offset;
1313         uintptr_t               svs_page;
1314 } segvn_sparse_t;
1315 #define SEGVN_MAX_SPARSE        ((128 * 1024) / sizeof (segvn_sparse_t))
1316 
1317 typedef struct {
1318         uintptr_t               svw_svdp;
1319         struct segvn_data       svw_svd;
1320         struct seg              svw_seg;
1321         size_t                  svw_walkoff;
1322         ulong_t                 svw_anonskip;
1323         segvn_sparse_t          *svw_sparse;
1324         size_t                  svw_sparse_idx;
1325         size_t                  svw_sparse_count;
1326         size_t                  svw_sparse_size;
1327         uint8_t                 svw_sparse_overflow;
1328         uint8_t                 svw_all;
1329 } segvn_walk_data_t;
1330 
1331 static int
1332 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1333 {
1334         segvn_walk_data_t       *const  svw = arg;
1335         const page_t            *const  pp = pp_arg;
1336         const u_offset_t                offset = pp->p_offset;
1337         segvn_sparse_t          *const  cur =
1338             &svw->svw_sparse[svw->svw_sparse_count];
1339 
1340         /* See if the page is of interest */
1341         if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1342                 return (WALK_NEXT);
1343         }
1344         /* See if we have space for the new entry, then add it. */
1345         if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1346                 svw->svw_sparse_overflow = 1;
1347                 return (WALK_DONE);
1348         }
1349         svw->svw_sparse_count++;
1350         cur->svs_offset = offset;
1351         cur->svs_page = addr;
1352         return (WALK_NEXT);
1353 }
1354 
1355 static int
1356 segvn_sparse_cmp(const void *lp, const void *rp)
1357 {
1358         const segvn_sparse_t *const     l = lp;
1359         const segvn_sparse_t *const     r = rp;
1360 
1361         if (l->svs_offset < r->svs_offset) {
1362                 return (-1);
1363         }
1364         if (l->svs_offset > r->svs_offset) {
1365                 return (1);
1366         }
1367         return (0);
1368 }
1369 
1370 /*
1371  * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1372  * structure.  For segvn_datas without an anon structure, it just looks up
1373  * pages in the vnode.  For segvn_datas with an anon structure, NULL slots
1374  * pass through to the vnode, and non-null slots are checked for residency.
1375  */
1376 int
1377 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1378 {
1379         segvn_walk_data_t       *svw;
1380         struct segvn_data       *svd;
1381 
1382         if (wsp->walk_addr == NULL) {
1383                 mdb_warn("segvn walk doesn't support global walks\n");
1384                 return (WALK_ERR);
1385         }
1386 
1387         svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1388         svw->svw_svdp = wsp->walk_addr;
1389         svw->svw_anonskip = 0;
1390         svw->svw_sparse_idx = 0;
1391         svw->svw_walkoff = 0;
1392         svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1393 
1394         if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1395             -1) {
1396                 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1397                 mdb_free(svw, sizeof (*svw));
1398                 return (WALK_ERR);
1399         }
1400 
1401         svd = &svw->svw_svd;
1402         if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1403             (uintptr_t)svd->seg) == -1) {
1404                 mdb_warn("failed to read seg at %p (from %p)",
1405                     svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1406                 mdb_free(svw, sizeof (*svw));
1407                 return (WALK_ERR);
1408         }
1409 
1410         if (svd->amp == NULL && svd->vp == NULL) {
1411                 /* make the walk terminate immediately;  no pages */
1412                 svw->svw_walkoff = svw->svw_seg.s_size;
1413 
1414         } else if (svd->amp == NULL &&
1415             (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1416                 /*
1417                  * If we don't have an anon pointer, and the segment is large,
1418                  * we try to load the in-memory pages into a fixed-size array,
1419                  * which is then sorted and reported directly.  This is much
1420                  * faster than doing a mdb_page_lookup() for each possible
1421                  * offset.
1422                  *
1423                  * If the allocation fails, or there are too many pages
1424                  * in-core, we fall back to looking up the pages individually.
1425                  */
1426                 svw->svw_sparse = mdb_alloc(
1427                     SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1428                 if (svw->svw_sparse != NULL) {
1429                         svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1430 
1431                         if (mdb_pwalk("page", segvn_sparse_fill, svw,
1432                             (uintptr_t)svd->vp) == -1 ||
1433                             svw->svw_sparse_overflow) {
1434                                 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1435                                     sizeof (*svw->svw_sparse));
1436                                 svw->svw_sparse = NULL;
1437                         } else {
1438                                 qsort(svw->svw_sparse, svw->svw_sparse_count,
1439                                     sizeof (*svw->svw_sparse),
1440                                     segvn_sparse_cmp);
1441                         }
1442                 }
1443 
1444         } else if (svd->amp != NULL) {
1445                 const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1446                     "segvn_anon" : "segvn_anon_all";
1447                 /*
1448                  * If we're not printing all offsets, and the segvn_data has
1449                  * no backing VP, we can use the "segvn_anon" walker, which
1450                  * efficiently skips NULL slots.
1451                  *
1452                  * Otherwise, we layer over the "segvn_anon_all" walker
1453                  * (which reports all anon slots, even NULL ones), so that
1454                  * segvn_pages_walk_step() knows the precise offset for each
1455                  * element.  It uses that offset information to look up the
1456                  * backing pages for NULL anon slots.
1457                  */
1458                 if (mdb_layered_walk(layer, wsp) == -1) {
1459                         mdb_warn("segvn_pages: failed to layer \"%s\" "
1460                             "for segvn_data %p", layer, svw->svw_svdp);
1461                         mdb_free(svw, sizeof (*svw));
1462                         return (WALK_ERR);
1463                 }
1464         }
1465 
1466         wsp->walk_data = svw;
1467         return (WALK_NEXT);
1468 }
1469 
1470 int
1471 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1472 {
1473         segvn_walk_data_t       *const  svw = wsp->walk_data;
1474         struct seg              *const  seg = &svw->svw_seg;
1475         struct segvn_data       *const  svd = &svw->svw_svd;
1476         uintptr_t               pp;
1477         page_t                  page;
1478 
1479         /* If we've walked off the end of the segment, we're done. */
1480         if (svw->svw_walkoff >= seg->s_size) {
1481                 return (WALK_DONE);
1482         }
1483 
1484         /*
1485          * If we've got a sparse page array, just send it directly.
1486          */
1487         if (svw->svw_sparse != NULL) {
1488                 u_offset_t off;
1489 
1490                 if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1491                         pp = NULL;
1492                         if (!svw->svw_all) {
1493                                 return (WALK_DONE);
1494                         }
1495                 } else {
1496                         segvn_sparse_t  *const svs =
1497                             &svw->svw_sparse[svw->svw_sparse_idx];
1498                         off = svs->svs_offset - svd->offset;
1499                         if (svw->svw_all && svw->svw_walkoff != off) {
1500                                 pp = NULL;
1501                         } else {
1502                                 pp = svs->svs_page;
1503                                 svw->svw_sparse_idx++;
1504                         }
1505                 }
1506 
1507         } else if (svd->amp == NULL || wsp->walk_addr == NULL) {
1508                 /*
1509                  * If there's no anon, or the anon slot is NULL, look up
1510                  * <vp, offset>.
1511                  */
1512                 if (svd->vp != NULL) {
1513                         pp = mdb_page_lookup((uintptr_t)svd->vp,
1514                             svd->offset + svw->svw_walkoff);
1515                 } else {
1516                         pp = NULL;
1517                 }
1518 
1519         } else {
1520                 const struct anon       *const  anon = wsp->walk_layer;
1521 
1522                 /*
1523                  * We have a "struct anon"; if it's not swapped out,
1524                  * look up the page.
1525                  */
1526                 if (anon->an_vp != NULL || anon->an_off != 0) {
1527                         pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1528                             anon->an_off);
1529                         if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1530                                 mdb_warn("walk segvn_pages: segvn_data %p "
1531                                     "offset %ld, anon page <%p, %llx> not "
1532                                     "found.\n", svw->svw_svdp, svw->svw_walkoff,
1533                                     anon->an_vp, anon->an_off);
1534                         }
1535                 } else {
1536                         if (anon->an_pvp == NULL) {
1537                                 mdb_warn("walk segvn_pages: useless struct "
1538                                     "anon at %p\n", wsp->walk_addr);
1539                         }
1540                         pp = NULL;      /* nothing at this offset */
1541                 }
1542         }
1543 
1544         svw->svw_walkoff += PAGESIZE;        /* Update for the next call */
1545         if (pp != NULL) {
1546                 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1547                         mdb_warn("unable to read page_t at %#lx", pp);
1548                         return (WALK_ERR);
1549                 }
1550                 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1551         }
1552         if (svw->svw_all) {
1553                 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1554         }
1555         return (WALK_NEXT);
1556 }
1557 
1558 void
1559 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1560 {
1561         segvn_walk_data_t       *const  svw = wsp->walk_data;
1562 
1563         if (svw->svw_sparse != NULL) {
1564                 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1565                     sizeof (*svw->svw_sparse));
1566         }
1567         mdb_free(svw, sizeof (*svw));
1568 }
1569 
1570 /*
1571  * Grumble, grumble.
1572  */
1573 #define SMAP_HASHFUNC(vp, off)  \
1574         ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1575         ((off) >> MAXBSHIFT)) & smd_hashmsk)
1576 
1577 int
1578 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1579 {
1580         long smd_hashmsk;
1581         int hash;
1582         uintptr_t offset = 0;
1583         struct smap smp;
1584         uintptr_t saddr, kaddr;
1585         uintptr_t smd_hash, smd_smap;
1586         struct seg seg;
1587 
1588         if (!(flags & DCMD_ADDRSPEC))
1589                 return (DCMD_USAGE);
1590 
1591         if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1592                 mdb_warn("failed to read smd_hashmsk");
1593                 return (DCMD_ERR);
1594         }
1595 
1596         if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1597                 mdb_warn("failed to read smd_hash");
1598                 return (DCMD_ERR);
1599         }
1600 
1601         if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1602                 mdb_warn("failed to read smd_hash");
1603                 return (DCMD_ERR);
1604         }
1605 
1606         if (mdb_readvar(&kaddr, "segkmap") == -1) {
1607                 mdb_warn("failed to read segkmap");
1608                 return (DCMD_ERR);
1609         }
1610 
1611         if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1612                 mdb_warn("failed to read segkmap at %p", kaddr);
1613                 return (DCMD_ERR);
1614         }
1615 
1616         if (argc != 0) {
1617                 const mdb_arg_t *arg = &argv[0];
1618 
1619                 if (arg->a_type == MDB_TYPE_IMMEDIATE)
1620                         offset = arg->a_un.a_val;
1621                 else
1622                         offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1623         }
1624 
1625         hash = SMAP_HASHFUNC(addr, offset);
1626 
1627         if (mdb_vread(&saddr, sizeof (saddr),
1628             smd_hash + hash * sizeof (uintptr_t)) == -1) {
1629                 mdb_warn("couldn't read smap at %p",
1630                     smd_hash + hash * sizeof (uintptr_t));
1631                 return (DCMD_ERR);
1632         }
1633 
1634         do {
1635                 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1636                         mdb_warn("couldn't read smap at %p", saddr);
1637                         return (DCMD_ERR);
1638                 }
1639 
1640                 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1641                         mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1642                             addr, offset, saddr, ((saddr - smd_smap) /
1643                             sizeof (smp)) * MAXBSIZE + seg.s_base);
1644                         return (DCMD_OK);
1645                 }
1646 
1647                 saddr = (uintptr_t)smp.sm_hash;
1648         } while (saddr != NULL);
1649 
1650         mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1651         return (DCMD_OK);
1652 }
1653 
1654 /*ARGSUSED*/
1655 int
1656 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1657 {
1658         uintptr_t kaddr;
1659         struct seg seg;
1660         struct segmap_data sd;
1661 
1662         if (!(flags & DCMD_ADDRSPEC))
1663                 return (DCMD_USAGE);
1664 
1665         if (mdb_readvar(&kaddr, "segkmap") == -1) {
1666                 mdb_warn("failed to read segkmap");
1667                 return (DCMD_ERR);
1668         }
1669 
1670         if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1671                 mdb_warn("failed to read segkmap at %p", kaddr);
1672                 return (DCMD_ERR);
1673         }
1674 
1675         if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1676                 mdb_warn("failed to read segmap_data at %p", seg.s_data);
1677                 return (DCMD_ERR);
1678         }
1679 
1680         mdb_printf("%p is smap %p\n", addr,
1681             ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1682             sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1683 
1684         return (DCMD_OK);
1685 }