1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2017 Joyent, Inc.
  24  */
  25 
  26 #include <mdb/mdb_param.h>
  27 #include <mdb/mdb_modapi.h>
  28 #include <mdb/mdb_ks.h>
  29 #include <sys/types.h>
  30 #include <sys/memlist.h>
  31 #include <sys/swap.h>
  32 #include <sys/systm.h>
  33 #include <sys/thread.h>
  34 #include <vm/anon.h>
  35 #include <vm/as.h>
  36 #include <vm/page.h>
  37 #include <sys/thread.h>
  38 #include <sys/swap.h>
  39 #include <sys/memlist.h>
  40 #include <sys/vnode.h>
  41 #include <vm/seg_map.h>
  42 #include <vm/seg_vn.h>
  43 #include <vm/seg_hole.h>
  44 #if defined(__i386) || defined(__amd64)
  45 #include <sys/balloon_impl.h>
  46 #endif
  47 
  48 #include "avl.h"
  49 #include "memory.h"
  50 
  51 /*
  52  * Page walker.
  53  * By default, this will walk all pages in the system.  If given an
  54  * address, it will walk all pages belonging to the vnode at that
  55  * address.
  56  */
  57 
  58 /*
  59  * page_walk_data
  60  *
  61  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
  62  * number of hash locations remaining in the page hash table when
  63  * walking all pages.
  64  *
  65  * The astute reader will notice that pw_hashloc is only used when
  66  * reading all pages (to hold a pointer to our location in the page
  67  * hash table), and that pw_first is only used when reading the pages
  68  * belonging to a particular vnode (to hold a pointer to the first
  69  * page).  While these could be combined to be a single pointer, they
  70  * are left separate for clarity.
  71  */
  72 typedef struct page_walk_data {
  73         long            pw_hashleft;
  74         void            **pw_hashloc;
  75         uintptr_t       pw_first;
  76 } page_walk_data_t;
  77 
  78 int
  79 page_walk_init(mdb_walk_state_t *wsp)
  80 {
  81         page_walk_data_t        *pwd;
  82         void    **ptr;
  83         size_t  hashsz;
  84         vnode_t vn;
  85 
  86         if (wsp->walk_addr == NULL) {
  87 
  88                 /*
  89                  * Walk all pages
  90                  */
  91 
  92                 if ((mdb_readvar(&ptr, "page_hash") == -1) ||
  93                     (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
  94                     (ptr == NULL) || (hashsz == 0)) {
  95                         mdb_warn("page_hash, page_hashsz not found or invalid");
  96                         return (WALK_ERR);
  97                 }
  98 
  99                 /*
 100                  * Since we are walking all pages, initialize hashleft
 101                  * to be the remaining number of entries in the page
 102                  * hash.  hashloc is set the start of the page hash
 103                  * table.  Setting the walk address to 0 indicates that
 104                  * we aren't currently following a hash chain, and that
 105                  * we need to scan the page hash table for a page.
 106                  */
 107                 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
 108                 pwd->pw_hashleft = hashsz;
 109                 pwd->pw_hashloc = ptr;
 110                 wsp->walk_addr = 0;
 111         } else {
 112 
 113                 /*
 114                  * Walk just this vnode
 115                  */
 116 
 117                 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
 118                         mdb_warn("unable to read vnode_t at %#lx",
 119                             wsp->walk_addr);
 120                         return (WALK_ERR);
 121                 }
 122 
 123                 /*
 124                  * We set hashleft to -1 to indicate that we are
 125                  * walking a vnode, and initialize first to 0 (it is
 126                  * used to terminate the walk, so it must not be set
 127                  * until after we have walked the first page).  The
 128                  * walk address is set to the first page.
 129                  */
 130                 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
 131                 pwd->pw_hashleft = -1;
 132                 pwd->pw_first = 0;
 133 
 134                 wsp->walk_addr = (uintptr_t)vn.v_pages;
 135         }
 136 
 137         wsp->walk_data = pwd;
 138 
 139         return (WALK_NEXT);
 140 }
 141 
 142 int
 143 page_walk_step(mdb_walk_state_t *wsp)
 144 {
 145         page_walk_data_t        *pwd = wsp->walk_data;
 146         page_t          page;
 147         uintptr_t       pp;
 148 
 149         pp = wsp->walk_addr;
 150 
 151         if (pwd->pw_hashleft < 0) {
 152 
 153                 /* We're walking a vnode's pages */
 154 
 155                 /*
 156                  * If we don't have any pages to walk, we have come
 157                  * back around to the first one (we finished), or we
 158                  * can't read the page we're looking at, we are done.
 159                  */
 160                 if (pp == NULL || pp == pwd->pw_first)
 161                         return (WALK_DONE);
 162                 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
 163                         mdb_warn("unable to read page_t at %#lx", pp);
 164                         return (WALK_ERR);
 165                 }
 166 
 167                 /*
 168                  * Set the walk address to the next page, and if the
 169                  * first page hasn't been set yet (i.e. we are on the
 170                  * first page), set it.
 171                  */
 172                 wsp->walk_addr = (uintptr_t)page.p_vpnext;
 173                 if (pwd->pw_first == NULL)
 174                         pwd->pw_first = pp;
 175 
 176         } else if (pwd->pw_hashleft > 0) {
 177 
 178                 /* We're walking all pages */
 179 
 180                 /*
 181                  * If pp (the walk address) is NULL, we scan through
 182                  * the page hash table until we find a page.
 183                  */
 184                 if (pp == NULL) {
 185 
 186                         /*
 187                          * Iterate through the page hash table until we
 188                          * find a page or reach the end.
 189                          */
 190                         do {
 191                                 if (mdb_vread(&pp, sizeof (uintptr_t),
 192                                     (uintptr_t)pwd->pw_hashloc) == -1) {
 193                                         mdb_warn("unable to read from %#p",
 194                                             pwd->pw_hashloc);
 195                                         return (WALK_ERR);
 196                                 }
 197                                 pwd->pw_hashleft--;
 198                                 pwd->pw_hashloc++;
 199                         } while (pwd->pw_hashleft && (pp == NULL));
 200 
 201                         /*
 202                          * We've reached the end; exit.
 203                          */
 204                         if (pp == NULL)
 205                                 return (WALK_DONE);
 206                 }
 207 
 208                 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
 209                         mdb_warn("unable to read page_t at %#lx", pp);
 210                         return (WALK_ERR);
 211                 }
 212 
 213                 /*
 214                  * Set the walk address to the next page.
 215                  */
 216                 wsp->walk_addr = (uintptr_t)page.p_hash;
 217 
 218         } else {
 219                 /* We've finished walking all pages. */
 220                 return (WALK_DONE);
 221         }
 222 
 223         return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
 224 }
 225 
 226 void
 227 page_walk_fini(mdb_walk_state_t *wsp)
 228 {
 229         mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
 230 }
 231 
 232 /*
 233  * allpages walks all pages in the system in order they appear in
 234  * the memseg structure
 235  */
 236 
 237 #define PAGE_BUFFER     128
 238 
 239 int
 240 allpages_walk_init(mdb_walk_state_t *wsp)
 241 {
 242         if (wsp->walk_addr != 0) {
 243                 mdb_warn("allpages only supports global walks.\n");
 244                 return (WALK_ERR);
 245         }
 246 
 247         if (mdb_layered_walk("memseg", wsp) == -1) {
 248                 mdb_warn("couldn't walk 'memseg'");
 249                 return (WALK_ERR);
 250         }
 251 
 252         wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
 253         return (WALK_NEXT);
 254 }
 255 
 256 int
 257 allpages_walk_step(mdb_walk_state_t *wsp)
 258 {
 259         const struct memseg *msp = wsp->walk_layer;
 260         page_t *buf = wsp->walk_data;
 261         size_t pg_read, i;
 262         size_t pg_num = msp->pages_end - msp->pages_base;
 263         const page_t *pg_addr = msp->pages;
 264 
 265         while (pg_num > 0) {
 266                 pg_read = MIN(pg_num, PAGE_BUFFER);
 267 
 268                 if (mdb_vread(buf, pg_read * sizeof (page_t),
 269                     (uintptr_t)pg_addr) == -1) {
 270                         mdb_warn("can't read page_t's at %#lx", pg_addr);
 271                         return (WALK_ERR);
 272                 }
 273                 for (i = 0; i < pg_read; i++) {
 274                         int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
 275                             &buf[i], wsp->walk_cbdata);
 276 
 277                         if (ret != WALK_NEXT)
 278                                 return (ret);
 279                 }
 280                 pg_num -= pg_read;
 281                 pg_addr += pg_read;
 282         }
 283 
 284         return (WALK_NEXT);
 285 }
 286 
 287 void
 288 allpages_walk_fini(mdb_walk_state_t *wsp)
 289 {
 290         mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
 291 }
 292 
 293 /*
 294  * Hash table + LRU queue.
 295  * This table is used to cache recently read vnodes for the memstat
 296  * command, to reduce the number of mdb_vread calls.  This greatly
 297  * speeds the memstat command on on live, large CPU count systems.
 298  */
 299 
 300 #define VN_SMALL        401
 301 #define VN_LARGE        10007
 302 #define VN_HTABLE_KEY(p, hp)    ((p) % ((hp)->vn_htable_buckets))
 303 
 304 struct vn_htable_list {
 305         uint_t vn_flag;                         /* v_flag from vnode    */
 306         uintptr_t vn_ptr;                       /* pointer to vnode     */
 307         struct vn_htable_list *vn_q_next;       /* queue next pointer   */
 308         struct vn_htable_list *vn_q_prev;       /* queue prev pointer   */
 309         struct vn_htable_list *vn_h_next;       /* hash table pointer   */
 310 };
 311 
 312 /*
 313  * vn_q_first        -> points to to head of queue: the vnode that was most
 314  *                      recently used
 315  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
 316  *                      vnode is read.
 317  * vn_htable         -> hash table
 318  * vn_htable_buf     -> contains htable objects
 319  * vn_htable_size    -> total number of items in the hash table
 320  * vn_htable_buckets -> number of buckets in the hash table
 321  */
 322 typedef struct vn_htable {
 323         struct vn_htable_list  *vn_q_first;
 324         struct vn_htable_list  *vn_q_last;
 325         struct vn_htable_list **vn_htable;
 326         struct vn_htable_list  *vn_htable_buf;
 327         int vn_htable_size;
 328         int vn_htable_buckets;
 329 } vn_htable_t;
 330 
 331 
 332 /* allocate memory, initilize hash table and LRU queue */
 333 static void
 334 vn_htable_init(vn_htable_t *hp, size_t vn_size)
 335 {
 336         int i;
 337         int htable_size = MAX(vn_size, VN_LARGE);
 338 
 339         if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
 340             * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
 341                 htable_size = VN_SMALL;
 342                 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
 343                     * htable_size, UM_SLEEP|UM_GC);
 344         }
 345 
 346         hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
 347             * htable_size, UM_SLEEP|UM_GC);
 348 
 349         hp->vn_q_first  = &hp->vn_htable_buf[0];
 350         hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
 351         hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
 352         hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
 353 
 354         for (i = 1; i < (htable_size-1); i++) {
 355                 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
 356                 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
 357         }
 358 
 359         hp->vn_htable_size = htable_size;
 360         hp->vn_htable_buckets = htable_size;
 361 }
 362 
 363 
 364 /*
 365  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
 366  * The function tries to find needed information in the following order:
 367  *
 368  * 1. check if ptr is the first in queue
 369  * 2. check if ptr is in hash table (if so move it to the top of queue)
 370  * 3. do mdb_vread, remove last queue item from queue and hash table.
 371  *    Insert new information to freed object, and put this object in to the
 372  *    top of the queue.
 373  */
 374 static int
 375 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
 376 {
 377         int hkey;
 378         struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
 379         struct vn_htable_list  *q_first = hp->vn_q_first;
 380 
 381         /* 1. vnode ptr is the first in queue, just get v_flag and return */
 382         if (q_first->vn_ptr == ptr) {
 383                 vp->v_flag = q_first->vn_flag;
 384 
 385                 return (0);
 386         }
 387 
 388         /* 2. search the hash table for this ptr */
 389         hkey = VN_HTABLE_KEY(ptr, hp);
 390         hent = hp->vn_htable[hkey];
 391         while (hent && (hent->vn_ptr != ptr))
 392                 hent = hent->vn_h_next;
 393 
 394         /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
 395         if (hent == NULL) {
 396                 struct vnode vn;
 397 
 398                 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
 399                         mdb_warn("unable to read vnode_t at %#lx", ptr);
 400                         return (-1);
 401                 }
 402 
 403                 /* we will insert read data into the last element in queue */
 404                 hent = hp->vn_q_last;
 405 
 406                 /* remove last hp->vn_q_last object from hash table */
 407                 if (hent->vn_ptr) {
 408                         htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
 409                         while (*htmp != hent)
 410                                 htmp = &(*htmp)->vn_h_next;
 411                         *htmp = hent->vn_h_next;
 412                 }
 413 
 414                 /* insert data into new free object */
 415                 hent->vn_ptr  = ptr;
 416                 hent->vn_flag = vn.v_flag;
 417 
 418                 /* insert new object into hash table */
 419                 hent->vn_h_next = hp->vn_htable[hkey];
 420                 hp->vn_htable[hkey] = hent;
 421         }
 422 
 423         /* Remove from queue. hent is not first, vn_q_prev is not NULL */
 424         q_next = hent->vn_q_next;
 425         q_prev = hent->vn_q_prev;
 426         if (q_next == NULL)
 427                 hp->vn_q_last = q_prev;
 428         else
 429                 q_next->vn_q_prev = q_prev;
 430         q_prev->vn_q_next = q_next;
 431 
 432         /* Add to the front of queue */
 433         hent->vn_q_prev = NULL;
 434         hent->vn_q_next = q_first;
 435         q_first->vn_q_prev = hent;
 436         hp->vn_q_first = hent;
 437 
 438         /* Set v_flag in vnode pointer from hent */
 439         vp->v_flag = hent->vn_flag;
 440 
 441         return (0);
 442 }
 443 
 444 /* Summary statistics of pages */
 445 typedef struct memstat {
 446         struct vnode    *ms_kvp;        /* Cached address of kernel vnode */
 447         struct vnode    *ms_unused_vp;  /* Unused pages vnode pointer     */
 448         struct vnode    *ms_zvp;        /* Cached address of zio vnode    */
 449         uint64_t        ms_kmem;        /* Pages of kernel memory         */
 450         uint64_t        ms_zfs_data;    /* Pages of zfs data              */
 451         uint64_t        ms_anon;        /* Pages of anonymous memory      */
 452         uint64_t        ms_vnode;       /* Pages of named (vnode) memory  */
 453         uint64_t        ms_exec;        /* Pages of exec/library memory   */
 454         uint64_t        ms_cachelist;   /* Pages on the cachelist (free)  */
 455         uint64_t        ms_bootpages;   /* Pages on the bootpages list    */
 456         uint64_t        ms_total;       /* Pages on page hash             */
 457         vn_htable_t     *ms_vn_htable;  /* Pointer to hash table          */
 458         struct vnode    ms_vn;          /* vnode buffer                   */
 459 } memstat_t;
 460 
 461 #define MS_PP_ISKAS(pp, stats)                          \
 462         ((pp)->p_vnode == (stats)->ms_kvp)
 463 
 464 #define MS_PP_ISZFS_DATA(pp, stats)                     \
 465         (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
 466 
 467 /*
 468  * Summarize pages by type and update stat information
 469  */
 470 
 471 /* ARGSUSED */
 472 static int
 473 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
 474 {
 475         struct vnode *vp = &stats->ms_vn;
 476 
 477         if (PP_ISBOOTPAGES(pp))
 478                 stats->ms_bootpages++;
 479         else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
 480                 return (WALK_NEXT);
 481         else if (MS_PP_ISKAS(pp, stats))
 482                 stats->ms_kmem++;
 483         else if (MS_PP_ISZFS_DATA(pp, stats))
 484                 stats->ms_zfs_data++;
 485         else if (PP_ISFREE(pp))
 486                 stats->ms_cachelist++;
 487         else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
 488                 return (WALK_ERR);
 489         else if (IS_SWAPFSVP(vp))
 490                 stats->ms_anon++;
 491         else if ((vp->v_flag & VVMEXEC) != 0)
 492                 stats->ms_exec++;
 493         else
 494                 stats->ms_vnode++;
 495 
 496         stats->ms_total++;
 497 
 498         return (WALK_NEXT);
 499 }
 500 
 501 /* ARGSUSED */
 502 int
 503 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 504 {
 505         pgcnt_t total_pages, physmem;
 506         ulong_t freemem;
 507         memstat_t stats;
 508         GElf_Sym sym;
 509         vn_htable_t ht;
 510         struct vnode *kvps;
 511         uintptr_t vn_size = 0;
 512 #if defined(__i386) || defined(__amd64)
 513         bln_stats_t bln_stats;
 514         ssize_t bln_size;
 515 #endif
 516 
 517         bzero(&stats, sizeof (memstat_t));
 518 
 519         /*
 520          * -s size, is an internal option. It specifies the size of vn_htable.
 521          * Hash table size is set in the following order:
 522          * If user has specified the size that is larger than VN_LARGE: try it,
 523          * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
 524          * failed to allocate default to VN_SMALL.
 525          * For a better efficiency of hash table it is highly recommended to
 526          * set size to a prime number.
 527          */
 528         if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
 529             's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
 530                 return (DCMD_USAGE);
 531 
 532         /* Initialize vnode hash list and queue */
 533         vn_htable_init(&ht, vn_size);
 534         stats.ms_vn_htable = &ht;
 535 
 536         /* Total physical memory */
 537         if (mdb_readvar(&total_pages, "total_pages") == -1) {
 538                 mdb_warn("unable to read total_pages");
 539                 return (DCMD_ERR);
 540         }
 541 
 542         /* Artificially limited memory */
 543         if (mdb_readvar(&physmem, "physmem") == -1) {
 544                 mdb_warn("unable to read physmem");
 545                 return (DCMD_ERR);
 546         }
 547 
 548         /* read kernel vnode array pointer */
 549         if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
 550             (GElf_Sym *)&sym) == -1) {
 551                 mdb_warn("unable to read kvps");
 552                 return (DCMD_ERR);
 553         }
 554         kvps = (struct vnode *)(uintptr_t)sym.st_value;
 555         stats.ms_kvp =  &kvps[KV_KVP];
 556 
 557         /*
 558          * Read the zio vnode pointer.
 559          */
 560         stats.ms_zvp = &kvps[KV_ZVP];
 561 
 562         /*
 563          * If physmem != total_pages, then the administrator has limited the
 564          * number of pages available in the system.  Excluded pages are
 565          * associated with the unused pages vnode.  Read this vnode so the
 566          * pages can be excluded in the page accounting.
 567          */
 568         if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
 569             (GElf_Sym *)&sym) == -1) {
 570                 mdb_warn("unable to read unused_pages_vp");
 571                 return (DCMD_ERR);
 572         }
 573         stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
 574 
 575         /* walk all pages, collect statistics */
 576         if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
 577             &stats) == -1) {
 578                 mdb_warn("can't walk memseg");
 579                 return (DCMD_ERR);
 580         }
 581 
 582 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
 583                 ((physmem) * 10)))
 584 
 585         mdb_printf("Page Summary                Pages                MB"
 586             "  %%Tot\n");
 587         mdb_printf("------------     ----------------  ----------------"
 588             "  ----\n");
 589         mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
 590             stats.ms_kmem,
 591             (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
 592             MS_PCT_TOTAL(stats.ms_kmem));
 593 
 594         if (stats.ms_bootpages != 0) {
 595                 mdb_printf("Boot pages       %16llu  %16llu  %3lu%%\n",
 596                     stats.ms_bootpages,
 597                     (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024),
 598                     MS_PCT_TOTAL(stats.ms_bootpages));
 599         }
 600 
 601         if (stats.ms_zfs_data != 0) {
 602                 mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
 603                     stats.ms_zfs_data,
 604                     (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
 605                     MS_PCT_TOTAL(stats.ms_zfs_data));
 606         }
 607 
 608         mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
 609             stats.ms_anon,
 610             (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
 611             MS_PCT_TOTAL(stats.ms_anon));
 612         mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
 613             stats.ms_exec,
 614             (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
 615             MS_PCT_TOTAL(stats.ms_exec));
 616         mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
 617             stats.ms_vnode,
 618             (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
 619             MS_PCT_TOTAL(stats.ms_vnode));
 620         mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
 621             stats.ms_cachelist,
 622             (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
 623             MS_PCT_TOTAL(stats.ms_cachelist));
 624 
 625         /*
 626          * occasionally, we double count pages above.  To avoid printing
 627          * absurdly large values for freemem, we clamp it at zero.
 628          */
 629         if (physmem > stats.ms_total)
 630                 freemem = physmem - stats.ms_total;
 631         else
 632                 freemem = 0;
 633 
 634 #if defined(__i386) || defined(__amd64)
 635         /* Are we running under Xen?  If so, get balloon memory usage. */
 636         if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
 637                 if (freemem > bln_stats.bln_hv_pages)
 638                         freemem -= bln_stats.bln_hv_pages;
 639                 else
 640                         freemem = 0;
 641         }
 642 #endif
 643 
 644         mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
 645             (uint64_t)freemem * PAGESIZE / (1024 * 1024),
 646             MS_PCT_TOTAL(freemem));
 647 
 648 #if defined(__i386) || defined(__amd64)
 649         if (bln_size != -1) {
 650                 mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
 651                     bln_stats.bln_hv_pages,
 652                     (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024),
 653                     MS_PCT_TOTAL(bln_stats.bln_hv_pages));
 654         }
 655 #endif
 656 
 657         mdb_printf("\nTotal            %16lu  %16lu\n",
 658             physmem,
 659             (uint64_t)physmem * PAGESIZE / (1024 * 1024));
 660 
 661         if (physmem != total_pages) {
 662                 mdb_printf("Physical         %16lu  %16lu\n",
 663                     total_pages,
 664                     (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
 665         }
 666 
 667 #undef MS_PCT_TOTAL
 668 
 669         return (DCMD_OK);
 670 }
 671 
 672 void
 673 pagelookup_help(void)
 674 {
 675         mdb_printf(
 676             "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
 677             "\n"
 678             "Can be invoked three different ways:\n\n"
 679             "    ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
 680             "    %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
 681             "    %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
 682             "\n"
 683             "The latter two forms are useful in pipelines.\n");
 684 }
 685 
 686 int
 687 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 688 {
 689         uintptr_t vp = -(uintptr_t)1;
 690         uint64_t offset = -(uint64_t)1;
 691 
 692         uintptr_t pageaddr;
 693         int hasaddr = (flags & DCMD_ADDRSPEC);
 694         int usedaddr = 0;
 695 
 696         if (mdb_getopts(argc, argv,
 697             'v', MDB_OPT_UINTPTR, &vp,
 698             'o', MDB_OPT_UINT64, &offset,
 699             0) != argc) {
 700                 return (DCMD_USAGE);
 701         }
 702 
 703         if (vp == -(uintptr_t)1) {
 704                 if (offset == -(uint64_t)1) {
 705                         mdb_warn(
 706                             "pagelookup: at least one of -v vp or -o offset "
 707                             "required.\n");
 708                         return (DCMD_USAGE);
 709                 }
 710                 vp = addr;
 711                 usedaddr = 1;
 712         } else if (offset == -(uint64_t)1) {
 713                 offset = mdb_get_dot();
 714                 usedaddr = 1;
 715         }
 716         if (usedaddr && !hasaddr) {
 717                 mdb_warn("pagelookup: address required\n");
 718                 return (DCMD_USAGE);
 719         }
 720         if (!usedaddr && hasaddr) {
 721                 mdb_warn(
 722                     "pagelookup: address specified when both -v and -o were "
 723                     "passed");
 724                 return (DCMD_USAGE);
 725         }
 726 
 727         pageaddr = mdb_page_lookup(vp, offset);
 728         if (pageaddr == 0) {
 729                 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
 730                     vp, offset);
 731                 return (DCMD_OK);
 732         }
 733         mdb_printf("%#lr\n", pageaddr);         /* this is PIPE_OUT friendly */
 734         return (DCMD_OK);
 735 }
 736 
 737 /*ARGSUSED*/
 738 int
 739 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 740 {
 741         uintptr_t pp;
 742 
 743         if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
 744                 return (DCMD_USAGE);
 745         }
 746 
 747         pp = mdb_pfn2page((pfn_t)addr);
 748         if (pp == 0) {
 749                 return (DCMD_ERR);
 750         }
 751 
 752         if (flags & DCMD_PIPE_OUT) {
 753                 mdb_printf("%#lr\n", pp);
 754         } else {
 755                 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
 756         }
 757 
 758         return (DCMD_OK);
 759 }
 760 
 761 int
 762 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 763 {
 764         page_t  p;
 765 
 766         if (!(flags & DCMD_ADDRSPEC)) {
 767                 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
 768                         mdb_warn("can't walk pages");
 769                         return (DCMD_ERR);
 770                 }
 771                 return (DCMD_OK);
 772         }
 773 
 774         if (DCMD_HDRSPEC(flags)) {
 775                 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
 776                     "PAGE", "VNODE", "OFFSET", "SELOCK",
 777                     "LCT", "COW", "IO", "FS", "ST");
 778         }
 779 
 780         if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
 781                 mdb_warn("can't read page_t at %#lx", addr);
 782                 return (DCMD_ERR);
 783         }
 784 
 785         mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
 786             addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
 787             p.p_iolock_state, p.p_fsdata, p.p_state);
 788 
 789         return (DCMD_OK);
 790 }
 791 
 792 int
 793 swap_walk_init(mdb_walk_state_t *wsp)
 794 {
 795         void    *ptr;
 796 
 797         if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
 798                 mdb_warn("swapinfo not found or invalid");
 799                 return (WALK_ERR);
 800         }
 801 
 802         wsp->walk_addr = (uintptr_t)ptr;
 803 
 804         return (WALK_NEXT);
 805 }
 806 
 807 int
 808 swap_walk_step(mdb_walk_state_t *wsp)
 809 {
 810         uintptr_t       sip;
 811         struct swapinfo si;
 812 
 813         sip = wsp->walk_addr;
 814 
 815         if (sip == NULL)
 816                 return (WALK_DONE);
 817 
 818         if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
 819                 mdb_warn("unable to read swapinfo at %#lx", sip);
 820                 return (WALK_ERR);
 821         }
 822 
 823         wsp->walk_addr = (uintptr_t)si.si_next;
 824 
 825         return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
 826 }
 827 
 828 int
 829 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 830 {
 831         struct swapinfo si;
 832         char            *name;
 833 
 834         if (!(flags & DCMD_ADDRSPEC)) {
 835                 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
 836                         mdb_warn("can't walk swapinfo");
 837                         return (DCMD_ERR);
 838                 }
 839                 return (DCMD_OK);
 840         }
 841 
 842         if (DCMD_HDRSPEC(flags)) {
 843                 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
 844                     "ADDR", "VNODE", "PAGES", "FREE", "NAME");
 845         }
 846 
 847         if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
 848                 mdb_warn("can't read swapinfo at %#lx", addr);
 849                 return (DCMD_ERR);
 850         }
 851 
 852         name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
 853         if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
 854                 name = "*error*";
 855 
 856         mdb_printf("%0?lx %?p %9d %9d %s\n",
 857             addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
 858 
 859         return (DCMD_OK);
 860 }
 861 
 862 int
 863 memlist_walk_step(mdb_walk_state_t *wsp)
 864 {
 865         uintptr_t       mlp;
 866         struct memlist  ml;
 867 
 868         mlp = wsp->walk_addr;
 869 
 870         if (mlp == NULL)
 871                 return (WALK_DONE);
 872 
 873         if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
 874                 mdb_warn("unable to read memlist at %#lx", mlp);
 875                 return (WALK_ERR);
 876         }
 877 
 878         wsp->walk_addr = (uintptr_t)ml.ml_next;
 879 
 880         return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
 881 }
 882 
 883 int
 884 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 885 {
 886         struct memlist  ml;
 887 
 888         if (!(flags & DCMD_ADDRSPEC)) {
 889                 uintptr_t ptr;
 890                 uint_t list = 0;
 891                 int i;
 892                 static const char *lists[] = {
 893                         "phys_install",
 894                         "phys_avail",
 895                         "virt_avail"
 896                 };
 897 
 898                 if (mdb_getopts(argc, argv,
 899                     'i', MDB_OPT_SETBITS, (1 << 0), &list,
 900                     'a', MDB_OPT_SETBITS, (1 << 1), &list,
 901                     'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
 902                         return (DCMD_USAGE);
 903 
 904                 if (!list)
 905                         list = 1;
 906 
 907                 for (i = 0; list; i++, list >>= 1) {
 908                         if (!(list & 1))
 909                                 continue;
 910                         if ((mdb_readvar(&ptr, lists[i]) == -1) ||
 911                             (ptr == NULL)) {
 912                                 mdb_warn("%s not found or invalid", lists[i]);
 913                                 return (DCMD_ERR);
 914                         }
 915 
 916                         mdb_printf("%s:\n", lists[i]);
 917                         if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
 918                             ptr) == -1) {
 919                                 mdb_warn("can't walk memlist");
 920                                 return (DCMD_ERR);
 921                         }
 922                 }
 923                 return (DCMD_OK);
 924         }
 925 
 926         if (DCMD_HDRSPEC(flags))
 927                 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
 928 
 929         if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
 930                 mdb_warn("can't read memlist at %#lx", addr);
 931                 return (DCMD_ERR);
 932         }
 933 
 934         mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
 935 
 936         return (DCMD_OK);
 937 }
 938 
 939 int
 940 seg_walk_init(mdb_walk_state_t *wsp)
 941 {
 942         if (wsp->walk_addr == NULL) {
 943                 mdb_warn("seg walk must begin at struct as *\n");
 944                 return (WALK_ERR);
 945         }
 946 
 947         /*
 948          * this is really just a wrapper to AVL tree walk
 949          */
 950         wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
 951         return (avl_walk_init(wsp));
 952 }
 953 
 954 /*ARGSUSED*/
 955 int
 956 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 957 {
 958         struct seg s;
 959 
 960         if (argc != 0)
 961                 return (DCMD_USAGE);
 962 
 963         if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
 964                 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
 965                     "SEG", "BASE", "SIZE", "DATA", "OPS");
 966         }
 967 
 968         if (mdb_vread(&s, sizeof (s), addr) == -1) {
 969                 mdb_warn("failed to read seg at %p", addr);
 970                 return (DCMD_ERR);
 971         }
 972 
 973         mdb_printf("%?p %?p %?lx %?p %a\n",
 974             addr, s.s_base, s.s_size, s.s_data, s.s_ops);
 975 
 976         return (DCMD_OK);
 977 }
 978 
 979 typedef struct pmap_walk_types {
 980         uintptr_t pwt_segvn;
 981         uintptr_t pwt_seghole;
 982 } pmap_walk_types_t;
 983 
 984 /*ARGSUSED*/
 985 static int
 986 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
 987 {
 988         pgcnt_t *nres = out;
 989 
 990         (*nres)++;
 991 
 992         return (WALK_NEXT);
 993 }
 994 
 995 static int
 996 pmap_walk_seg(uintptr_t addr, const struct seg *seg,
 997     const pmap_walk_types_t *types)
 998 {
 999         const uintptr_t ops = (uintptr_t)seg->s_ops;
1000 
1001         mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1002 
1003         if (ops == types->pwt_segvn && seg->s_data != NULL) {
1004                 struct segvn_data svn;
1005                 pgcnt_t nres = 0;
1006 
1007                 svn.vp = NULL;
1008                 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1009 
1010                 /*
1011                  * Use the segvn_pages walker to find all of the in-core pages
1012                  * for this mapping.
1013                  */
1014                 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
1015                     (uintptr_t)seg->s_data) == -1) {
1016                         mdb_warn("failed to walk segvn_pages (s_data=%p)",
1017                             seg->s_data);
1018                 }
1019                 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
1020 
1021                 if (svn.vp != NULL) {
1022                         char buf[29];
1023 
1024                         mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
1025                         mdb_printf(" %s", buf);
1026                 } else {
1027                         mdb_printf(" [ anon ]");
1028                 }
1029         } else if (ops == types->pwt_seghole && seg->s_data != NULL) {
1030                 seghole_data_t shd;
1031                 char name[16];
1032 
1033                 (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data);
1034                 if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name),
1035                     (uintptr_t)shd.shd_name) == 0) {
1036                         name[0] = '\0';
1037                 }
1038 
1039                 mdb_printf(" %8s [ hole%s%s ]", "-",
1040                     name[0] == '0' ? "" : ":", name);
1041         } else {
1042                 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1043         }
1044 
1045         mdb_printf("\n");
1046         return (WALK_NEXT);
1047 }
1048 
1049 static int
1050 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg,
1051     const pmap_walk_types_t *types)
1052 {
1053         const uintptr_t ops = (uintptr_t)seg->s_ops;
1054 
1055         mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1056 
1057         if (ops == types->pwt_segvn && seg->s_data != NULL) {
1058                 struct segvn_data svn;
1059 
1060                 svn.vp = NULL;
1061                 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1062 
1063                 if (svn.vp != NULL) {
1064                         mdb_printf(" %0?p", svn.vp);
1065                 } else {
1066                         mdb_printf(" [ anon ]");
1067                 }
1068         } else {
1069                 mdb_printf(" [ &%a ]", seg->s_ops);
1070         }
1071 
1072         mdb_printf("\n");
1073         return (WALK_NEXT);
1074 }
1075 
1076 /*ARGSUSED*/
1077 int
1078 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1079 {
1080         proc_t proc;
1081         uint_t quick = FALSE;
1082         mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1083         pmap_walk_types_t wtypes = { 0 };
1084 
1085         GElf_Sym sym;
1086 
1087         if (!(flags & DCMD_ADDRSPEC))
1088                 return (DCMD_USAGE);
1089 
1090         if (mdb_getopts(argc, argv,
1091             'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1092                 return (DCMD_USAGE);
1093 
1094         if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1095                 mdb_warn("failed to read proc at %p", addr);
1096                 return (DCMD_ERR);
1097         }
1098 
1099         if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1100                 wtypes.pwt_segvn = (uintptr_t)sym.st_value;
1101         if (mdb_lookup_by_name("seghole_ops", &sym) == 0)
1102                 wtypes.pwt_seghole = (uintptr_t)sym.st_value;
1103 
1104         mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1105 
1106         if (quick) {
1107                 mdb_printf("VNODE\n");
1108                 cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1109         } else {
1110                 mdb_printf("%8s %s\n", "RES", "PATH");
1111         }
1112 
1113         if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) {
1114                 mdb_warn("failed to walk segments of as %p", proc.p_as);
1115                 return (DCMD_ERR);
1116         }
1117 
1118         return (DCMD_OK);
1119 }
1120 
1121 typedef struct anon_walk_data {
1122         uintptr_t *aw_levone;
1123         uintptr_t *aw_levtwo;
1124         size_t aw_minslot;
1125         size_t aw_maxslot;
1126         pgcnt_t aw_nlevone;
1127         pgcnt_t aw_levone_ndx;
1128         size_t aw_levtwo_ndx;
1129         struct anon_map *aw_ampp;
1130         struct anon_map aw_amp;
1131         struct anon_hdr aw_ahp;
1132         int             aw_all; /* report all anon pointers, even NULLs */
1133 } anon_walk_data_t;
1134 
1135 int
1136 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1137 {
1138         anon_walk_data_t *aw;
1139 
1140         if (wsp->walk_addr == NULL) {
1141                 mdb_warn("anon walk doesn't support global walks\n");
1142                 return (WALK_ERR);
1143         }
1144 
1145         aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1146         aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1147 
1148         if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1149                 mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1150                 mdb_free(aw, sizeof (anon_walk_data_t));
1151                 return (WALK_ERR);
1152         }
1153 
1154         if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1155             (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1156                 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1157                 mdb_free(aw, sizeof (anon_walk_data_t));
1158                 return (WALK_ERR);
1159         }
1160 
1161         /* update min and maxslot with the given constraints */
1162         maxslot = MIN(maxslot, aw->aw_ahp.size);
1163         minslot = MIN(minslot, maxslot);
1164 
1165         if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1166             (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1167                 aw->aw_nlevone = maxslot;
1168                 aw->aw_levone_ndx = minslot;
1169                 aw->aw_levtwo = NULL;
1170         } else {
1171                 aw->aw_nlevone =
1172                     (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1173                 aw->aw_levone_ndx = 0;
1174                 aw->aw_levtwo =
1175                     mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1176         }
1177 
1178         aw->aw_levone =
1179             mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1180         aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1181 
1182         mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1183             (uintptr_t)aw->aw_ahp.array_chunk);
1184 
1185         aw->aw_levtwo_ndx = 0;
1186         aw->aw_minslot = minslot;
1187         aw->aw_maxslot = maxslot;
1188 
1189 out:
1190         wsp->walk_data = aw;
1191         return (0);
1192 }
1193 
1194 int
1195 anon_walk_step(mdb_walk_state_t *wsp)
1196 {
1197         anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1198         struct anon anon;
1199         uintptr_t anonptr;
1200         ulong_t slot;
1201 
1202         /*
1203          * Once we've walked through level one, we're done.
1204          */
1205         if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1206                 return (WALK_DONE);
1207         }
1208 
1209         if (aw->aw_levtwo == NULL) {
1210                 anonptr = aw->aw_levone[aw->aw_levone_ndx];
1211                 aw->aw_levone_ndx++;
1212         } else {
1213                 if (aw->aw_levtwo_ndx == 0) {
1214                         uintptr_t levtwoptr;
1215 
1216                         /* The first time through, skip to our first index. */
1217                         if (aw->aw_levone_ndx == 0) {
1218                                 aw->aw_levone_ndx =
1219                                     aw->aw_minslot / ANON_CHUNK_SIZE;
1220                                 aw->aw_levtwo_ndx =
1221                                     aw->aw_minslot % ANON_CHUNK_SIZE;
1222                         }
1223 
1224                         levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1225 
1226                         if (levtwoptr == NULL) {
1227                                 if (!aw->aw_all) {
1228                                         aw->aw_levtwo_ndx = 0;
1229                                         aw->aw_levone_ndx++;
1230                                         return (WALK_NEXT);
1231                                 }
1232                                 bzero(aw->aw_levtwo,
1233                                     ANON_CHUNK_SIZE * sizeof (uintptr_t));
1234 
1235                         } else if (mdb_vread(aw->aw_levtwo,
1236                             ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1237                             -1) {
1238                                 mdb_warn("unable to read anon_map %p's "
1239                                     "second-level map %d at %p",
1240                                     aw->aw_ampp, aw->aw_levone_ndx,
1241                                     levtwoptr);
1242                                 return (WALK_ERR);
1243                         }
1244                 }
1245                 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1246                 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1247 
1248                 /* update the indices for next time */
1249                 aw->aw_levtwo_ndx++;
1250                 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1251                         aw->aw_levtwo_ndx = 0;
1252                         aw->aw_levone_ndx++;
1253                 }
1254 
1255                 /* make sure the slot # is in the requested range */
1256                 if (slot >= aw->aw_maxslot) {
1257                         return (WALK_DONE);
1258                 }
1259         }
1260 
1261         if (anonptr != NULL) {
1262                 mdb_vread(&anon, sizeof (anon), anonptr);
1263                 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1264         }
1265         if (aw->aw_all) {
1266                 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1267         }
1268         return (WALK_NEXT);
1269 }
1270 
1271 void
1272 anon_walk_fini(mdb_walk_state_t *wsp)
1273 {
1274         anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1275 
1276         if (aw->aw_levtwo != NULL)
1277                 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1278 
1279         mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1280         mdb_free(aw, sizeof (anon_walk_data_t));
1281 }
1282 
1283 int
1284 anon_walk_init(mdb_walk_state_t *wsp)
1285 {
1286         return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1287 }
1288 
1289 int
1290 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1291 {
1292         const uintptr_t         svd_addr = wsp->walk_addr;
1293         uintptr_t               amp_addr;
1294         uintptr_t               seg_addr;
1295         struct segvn_data       svd;
1296         struct anon_map         amp;
1297         struct seg              seg;
1298 
1299         if (svd_addr == NULL) {
1300                 mdb_warn("segvn_anon walk doesn't support global walks\n");
1301                 return (WALK_ERR);
1302         }
1303         if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1304                 mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1305                     svd_addr);
1306                 return (WALK_ERR);
1307         }
1308         if (svd.amp == NULL) {
1309                 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1310                     svd_addr);
1311                 return (WALK_ERR);
1312         }
1313         amp_addr = (uintptr_t)svd.amp;
1314         if (mdb_vread(&amp, sizeof (amp), amp_addr) == -1) {
1315                 mdb_warn("segvn_anon walk: unable to read amp %p for "
1316                     "segvn_data %p", amp_addr, svd_addr);
1317                 return (WALK_ERR);
1318         }
1319         seg_addr = (uintptr_t)svd.seg;
1320         if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1321                 mdb_warn("segvn_anon walk: unable to read seg %p for "
1322                     "segvn_data %p", seg_addr, svd_addr);
1323                 return (WALK_ERR);
1324         }
1325         if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1326                 mdb_warn("anon map %p is too small for segment %p\n",
1327                     amp_addr, seg_addr);
1328                 return (WALK_ERR);
1329         }
1330 
1331         wsp->walk_addr = amp_addr;
1332         return (anon_walk_init_common(wsp,
1333             svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1334 }
1335 
1336 
1337 typedef struct {
1338         u_offset_t              svs_offset;
1339         uintptr_t               svs_page;
1340 } segvn_sparse_t;
1341 #define SEGVN_MAX_SPARSE        ((128 * 1024) / sizeof (segvn_sparse_t))
1342 
1343 typedef struct {
1344         uintptr_t               svw_svdp;
1345         struct segvn_data       svw_svd;
1346         struct seg              svw_seg;
1347         size_t                  svw_walkoff;
1348         ulong_t                 svw_anonskip;
1349         segvn_sparse_t          *svw_sparse;
1350         size_t                  svw_sparse_idx;
1351         size_t                  svw_sparse_count;
1352         size_t                  svw_sparse_size;
1353         uint8_t                 svw_sparse_overflow;
1354         uint8_t                 svw_all;
1355 } segvn_walk_data_t;
1356 
1357 static int
1358 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1359 {
1360         segvn_walk_data_t       *const  svw = arg;
1361         const page_t            *const  pp = pp_arg;
1362         const u_offset_t                offset = pp->p_offset;
1363         segvn_sparse_t          *const  cur =
1364             &svw->svw_sparse[svw->svw_sparse_count];
1365 
1366         /* See if the page is of interest */
1367         if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1368                 return (WALK_NEXT);
1369         }
1370         /* See if we have space for the new entry, then add it. */
1371         if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1372                 svw->svw_sparse_overflow = 1;
1373                 return (WALK_DONE);
1374         }
1375         svw->svw_sparse_count++;
1376         cur->svs_offset = offset;
1377         cur->svs_page = addr;
1378         return (WALK_NEXT);
1379 }
1380 
1381 static int
1382 segvn_sparse_cmp(const void *lp, const void *rp)
1383 {
1384         const segvn_sparse_t *const     l = lp;
1385         const segvn_sparse_t *const     r = rp;
1386 
1387         if (l->svs_offset < r->svs_offset) {
1388                 return (-1);
1389         }
1390         if (l->svs_offset > r->svs_offset) {
1391                 return (1);
1392         }
1393         return (0);
1394 }
1395 
1396 /*
1397  * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1398  * structure.  For segvn_datas without an anon structure, it just looks up
1399  * pages in the vnode.  For segvn_datas with an anon structure, NULL slots
1400  * pass through to the vnode, and non-null slots are checked for residency.
1401  */
1402 int
1403 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1404 {
1405         segvn_walk_data_t       *svw;
1406         struct segvn_data       *svd;
1407 
1408         if (wsp->walk_addr == NULL) {
1409                 mdb_warn("segvn walk doesn't support global walks\n");
1410                 return (WALK_ERR);
1411         }
1412 
1413         svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1414         svw->svw_svdp = wsp->walk_addr;
1415         svw->svw_anonskip = 0;
1416         svw->svw_sparse_idx = 0;
1417         svw->svw_walkoff = 0;
1418         svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1419 
1420         if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1421             -1) {
1422                 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1423                 mdb_free(svw, sizeof (*svw));
1424                 return (WALK_ERR);
1425         }
1426 
1427         svd = &svw->svw_svd;
1428         if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1429             (uintptr_t)svd->seg) == -1) {
1430                 mdb_warn("failed to read seg at %p (from %p)",
1431                     svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1432                 mdb_free(svw, sizeof (*svw));
1433                 return (WALK_ERR);
1434         }
1435 
1436         if (svd->amp == NULL && svd->vp == NULL) {
1437                 /* make the walk terminate immediately;  no pages */
1438                 svw->svw_walkoff = svw->svw_seg.s_size;
1439 
1440         } else if (svd->amp == NULL &&
1441             (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1442                 /*
1443                  * If we don't have an anon pointer, and the segment is large,
1444                  * we try to load the in-memory pages into a fixed-size array,
1445                  * which is then sorted and reported directly.  This is much
1446                  * faster than doing a mdb_page_lookup() for each possible
1447                  * offset.
1448                  *
1449                  * If the allocation fails, or there are too many pages
1450                  * in-core, we fall back to looking up the pages individually.
1451                  */
1452                 svw->svw_sparse = mdb_alloc(
1453                     SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1454                 if (svw->svw_sparse != NULL) {
1455                         svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1456 
1457                         if (mdb_pwalk("page", segvn_sparse_fill, svw,
1458                             (uintptr_t)svd->vp) == -1 ||
1459                             svw->svw_sparse_overflow) {
1460                                 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1461                                     sizeof (*svw->svw_sparse));
1462                                 svw->svw_sparse = NULL;
1463                         } else {
1464                                 qsort(svw->svw_sparse, svw->svw_sparse_count,
1465                                     sizeof (*svw->svw_sparse),
1466                                     segvn_sparse_cmp);
1467                         }
1468                 }
1469 
1470         } else if (svd->amp != NULL) {
1471                 const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1472                     "segvn_anon" : "segvn_anon_all";
1473                 /*
1474                  * If we're not printing all offsets, and the segvn_data has
1475                  * no backing VP, we can use the "segvn_anon" walker, which
1476                  * efficiently skips NULL slots.
1477                  *
1478                  * Otherwise, we layer over the "segvn_anon_all" walker
1479                  * (which reports all anon slots, even NULL ones), so that
1480                  * segvn_pages_walk_step() knows the precise offset for each
1481                  * element.  It uses that offset information to look up the
1482                  * backing pages for NULL anon slots.
1483                  */
1484                 if (mdb_layered_walk(layer, wsp) == -1) {
1485                         mdb_warn("segvn_pages: failed to layer \"%s\" "
1486                             "for segvn_data %p", layer, svw->svw_svdp);
1487                         mdb_free(svw, sizeof (*svw));
1488                         return (WALK_ERR);
1489                 }
1490         }
1491 
1492         wsp->walk_data = svw;
1493         return (WALK_NEXT);
1494 }
1495 
1496 int
1497 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1498 {
1499         segvn_walk_data_t       *const  svw = wsp->walk_data;
1500         struct seg              *const  seg = &svw->svw_seg;
1501         struct segvn_data       *const  svd = &svw->svw_svd;
1502         uintptr_t               pp;
1503         page_t                  page;
1504 
1505         /* If we've walked off the end of the segment, we're done. */
1506         if (svw->svw_walkoff >= seg->s_size) {
1507                 return (WALK_DONE);
1508         }
1509 
1510         /*
1511          * If we've got a sparse page array, just send it directly.
1512          */
1513         if (svw->svw_sparse != NULL) {
1514                 u_offset_t off;
1515 
1516                 if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1517                         pp = NULL;
1518                         if (!svw->svw_all) {
1519                                 return (WALK_DONE);
1520                         }
1521                 } else {
1522                         segvn_sparse_t  *const svs =
1523                             &svw->svw_sparse[svw->svw_sparse_idx];
1524                         off = svs->svs_offset - svd->offset;
1525                         if (svw->svw_all && svw->svw_walkoff != off) {
1526                                 pp = NULL;
1527                         } else {
1528                                 pp = svs->svs_page;
1529                                 svw->svw_sparse_idx++;
1530                         }
1531                 }
1532 
1533         } else if (svd->amp == NULL || wsp->walk_addr == NULL) {
1534                 /*
1535                  * If there's no anon, or the anon slot is NULL, look up
1536                  * <vp, offset>.
1537                  */
1538                 if (svd->vp != NULL) {
1539                         pp = mdb_page_lookup((uintptr_t)svd->vp,
1540                             svd->offset + svw->svw_walkoff);
1541                 } else {
1542                         pp = NULL;
1543                 }
1544 
1545         } else {
1546                 const struct anon       *const  anon = wsp->walk_layer;
1547 
1548                 /*
1549                  * We have a "struct anon"; if it's not swapped out,
1550                  * look up the page.
1551                  */
1552                 if (anon->an_vp != NULL || anon->an_off != 0) {
1553                         pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1554                             anon->an_off);
1555                         if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1556                                 mdb_warn("walk segvn_pages: segvn_data %p "
1557                                     "offset %ld, anon page <%p, %llx> not "
1558                                     "found.\n", svw->svw_svdp, svw->svw_walkoff,
1559                                     anon->an_vp, anon->an_off);
1560                         }
1561                 } else {
1562                         if (anon->an_pvp == NULL) {
1563                                 mdb_warn("walk segvn_pages: useless struct "
1564                                     "anon at %p\n", wsp->walk_addr);
1565                         }
1566                         pp = NULL;      /* nothing at this offset */
1567                 }
1568         }
1569 
1570         svw->svw_walkoff += PAGESIZE;        /* Update for the next call */
1571         if (pp != NULL) {
1572                 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1573                         mdb_warn("unable to read page_t at %#lx", pp);
1574                         return (WALK_ERR);
1575                 }
1576                 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1577         }
1578         if (svw->svw_all) {
1579                 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1580         }
1581         return (WALK_NEXT);
1582 }
1583 
1584 void
1585 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1586 {
1587         segvn_walk_data_t       *const  svw = wsp->walk_data;
1588 
1589         if (svw->svw_sparse != NULL) {
1590                 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1591                     sizeof (*svw->svw_sparse));
1592         }
1593         mdb_free(svw, sizeof (*svw));
1594 }
1595 
1596 /*
1597  * Grumble, grumble.
1598  */
1599 #define SMAP_HASHFUNC(vp, off)  \
1600         ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1601         ((off) >> MAXBSHIFT)) & smd_hashmsk)
1602 
1603 int
1604 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1605 {
1606         long smd_hashmsk;
1607         int hash;
1608         uintptr_t offset = 0;
1609         struct smap smp;
1610         uintptr_t saddr, kaddr;
1611         uintptr_t smd_hash, smd_smap;
1612         struct seg seg;
1613 
1614         if (!(flags & DCMD_ADDRSPEC))
1615                 return (DCMD_USAGE);
1616 
1617         if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1618                 mdb_warn("failed to read smd_hashmsk");
1619                 return (DCMD_ERR);
1620         }
1621 
1622         if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1623                 mdb_warn("failed to read smd_hash");
1624                 return (DCMD_ERR);
1625         }
1626 
1627         if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1628                 mdb_warn("failed to read smd_hash");
1629                 return (DCMD_ERR);
1630         }
1631 
1632         if (mdb_readvar(&kaddr, "segkmap") == -1) {
1633                 mdb_warn("failed to read segkmap");
1634                 return (DCMD_ERR);
1635         }
1636 
1637         if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1638                 mdb_warn("failed to read segkmap at %p", kaddr);
1639                 return (DCMD_ERR);
1640         }
1641 
1642         if (argc != 0) {
1643                 const mdb_arg_t *arg = &argv[0];
1644 
1645                 if (arg->a_type == MDB_TYPE_IMMEDIATE)
1646                         offset = arg->a_un.a_val;
1647                 else
1648                         offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1649         }
1650 
1651         hash = SMAP_HASHFUNC(addr, offset);
1652 
1653         if (mdb_vread(&saddr, sizeof (saddr),
1654             smd_hash + hash * sizeof (uintptr_t)) == -1) {
1655                 mdb_warn("couldn't read smap at %p",
1656                     smd_hash + hash * sizeof (uintptr_t));
1657                 return (DCMD_ERR);
1658         }
1659 
1660         do {
1661                 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1662                         mdb_warn("couldn't read smap at %p", saddr);
1663                         return (DCMD_ERR);
1664                 }
1665 
1666                 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1667                         mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1668                             addr, offset, saddr, ((saddr - smd_smap) /
1669                             sizeof (smp)) * MAXBSIZE + seg.s_base);
1670                         return (DCMD_OK);
1671                 }
1672 
1673                 saddr = (uintptr_t)smp.sm_hash;
1674         } while (saddr != NULL);
1675 
1676         mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1677         return (DCMD_OK);
1678 }
1679 
1680 /*ARGSUSED*/
1681 int
1682 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1683 {
1684         uintptr_t kaddr;
1685         struct seg seg;
1686         struct segmap_data sd;
1687 
1688         if (!(flags & DCMD_ADDRSPEC))
1689                 return (DCMD_USAGE);
1690 
1691         if (mdb_readvar(&kaddr, "segkmap") == -1) {
1692                 mdb_warn("failed to read segkmap");
1693                 return (DCMD_ERR);
1694         }
1695 
1696         if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1697                 mdb_warn("failed to read segkmap at %p", kaddr);
1698                 return (DCMD_ERR);
1699         }
1700 
1701         if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1702                 mdb_warn("failed to read segmap_data at %p", seg.s_data);
1703                 return (DCMD_ERR);
1704         }
1705 
1706         mdb_printf("%p is smap %p\n", addr,
1707             ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1708             sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1709 
1710         return (DCMD_OK);
1711 }