1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2017 Joyent, Inc. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ks.h> 29 #include <sys/types.h> 30 #include <sys/memlist.h> 31 #include <sys/swap.h> 32 #include <sys/systm.h> 33 #include <sys/thread.h> 34 #include <vm/anon.h> 35 #include <vm/as.h> 36 #include <vm/page.h> 37 #include <sys/thread.h> 38 #include <sys/swap.h> 39 #include <sys/memlist.h> 40 #include <sys/vnode.h> 41 #include <vm/seg_map.h> 42 #include <vm/seg_vn.h> 43 #include <vm/seg_hole.h> 44 45 #include "avl.h" 46 #include "memory.h" 47 48 /* 49 * Page walker. 50 * By default, this will walk all pages in the system. If given an 51 * address, it will walk all pages belonging to the vnode at that 52 * address. 53 */ 54 55 /* 56 * page_walk_data 57 * 58 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the 59 * number of hash locations remaining in the page hash table when 60 * walking all pages. 61 * 62 * The astute reader will notice that pw_hashloc is only used when 63 * reading all pages (to hold a pointer to our location in the page 64 * hash table), and that pw_first is only used when reading the pages 65 * belonging to a particular vnode (to hold a pointer to the first 66 * page). While these could be combined to be a single pointer, they 67 * are left separate for clarity. 68 */ 69 typedef struct page_walk_data { 70 long pw_hashleft; 71 void **pw_hashloc; 72 uintptr_t pw_first; 73 } page_walk_data_t; 74 75 int 76 page_walk_init(mdb_walk_state_t *wsp) 77 { 78 page_walk_data_t *pwd; 79 void **ptr; 80 size_t hashsz; 81 vnode_t vn; 82 83 if (wsp->walk_addr == NULL) { 84 85 /* 86 * Walk all pages 87 */ 88 89 if ((mdb_readvar(&ptr, "page_hash") == -1) || 90 (mdb_readvar(&hashsz, "page_hashsz") == -1) || 91 (ptr == NULL) || (hashsz == 0)) { 92 mdb_warn("page_hash, page_hashsz not found or invalid"); 93 return (WALK_ERR); 94 } 95 96 /* 97 * Since we are walking all pages, initialize hashleft 98 * to be the remaining number of entries in the page 99 * hash. hashloc is set the start of the page hash 100 * table. Setting the walk address to 0 indicates that 101 * we aren't currently following a hash chain, and that 102 * we need to scan the page hash table for a page. 103 */ 104 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 105 pwd->pw_hashleft = hashsz; 106 pwd->pw_hashloc = ptr; 107 wsp->walk_addr = 0; 108 } else { 109 110 /* 111 * Walk just this vnode 112 */ 113 114 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) { 115 mdb_warn("unable to read vnode_t at %#lx", 116 wsp->walk_addr); 117 return (WALK_ERR); 118 } 119 120 /* 121 * We set hashleft to -1 to indicate that we are 122 * walking a vnode, and initialize first to 0 (it is 123 * used to terminate the walk, so it must not be set 124 * until after we have walked the first page). The 125 * walk address is set to the first page. 126 */ 127 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 128 pwd->pw_hashleft = -1; 129 pwd->pw_first = 0; 130 131 wsp->walk_addr = (uintptr_t)vn.v_pages; 132 } 133 134 wsp->walk_data = pwd; 135 136 return (WALK_NEXT); 137 } 138 139 int 140 page_walk_step(mdb_walk_state_t *wsp) 141 { 142 page_walk_data_t *pwd = wsp->walk_data; 143 page_t page; 144 uintptr_t pp; 145 146 pp = wsp->walk_addr; 147 148 if (pwd->pw_hashleft < 0) { 149 150 /* We're walking a vnode's pages */ 151 152 /* 153 * If we don't have any pages to walk, we have come 154 * back around to the first one (we finished), or we 155 * can't read the page we're looking at, we are done. 156 */ 157 if (pp == NULL || pp == pwd->pw_first) 158 return (WALK_DONE); 159 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 160 mdb_warn("unable to read page_t at %#lx", pp); 161 return (WALK_ERR); 162 } 163 164 /* 165 * Set the walk address to the next page, and if the 166 * first page hasn't been set yet (i.e. we are on the 167 * first page), set it. 168 */ 169 wsp->walk_addr = (uintptr_t)page.p_vpnext; 170 if (pwd->pw_first == NULL) 171 pwd->pw_first = pp; 172 173 } else if (pwd->pw_hashleft > 0) { 174 175 /* We're walking all pages */ 176 177 /* 178 * If pp (the walk address) is NULL, we scan through 179 * the page hash table until we find a page. 180 */ 181 if (pp == NULL) { 182 183 /* 184 * Iterate through the page hash table until we 185 * find a page or reach the end. 186 */ 187 do { 188 if (mdb_vread(&pp, sizeof (uintptr_t), 189 (uintptr_t)pwd->pw_hashloc) == -1) { 190 mdb_warn("unable to read from %#p", 191 pwd->pw_hashloc); 192 return (WALK_ERR); 193 } 194 pwd->pw_hashleft--; 195 pwd->pw_hashloc++; 196 } while (pwd->pw_hashleft && (pp == NULL)); 197 198 /* 199 * We've reached the end; exit. 200 */ 201 if (pp == NULL) 202 return (WALK_DONE); 203 } 204 205 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 206 mdb_warn("unable to read page_t at %#lx", pp); 207 return (WALK_ERR); 208 } 209 210 /* 211 * Set the walk address to the next page. 212 */ 213 wsp->walk_addr = (uintptr_t)page.p_hash; 214 215 } else { 216 /* We've finished walking all pages. */ 217 return (WALK_DONE); 218 } 219 220 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata)); 221 } 222 223 void 224 page_walk_fini(mdb_walk_state_t *wsp) 225 { 226 mdb_free(wsp->walk_data, sizeof (page_walk_data_t)); 227 } 228 229 /* 230 * allpages walks all pages in the system in order they appear in 231 * the memseg structure 232 */ 233 234 #define PAGE_BUFFER 128 235 236 int 237 allpages_walk_init(mdb_walk_state_t *wsp) 238 { 239 if (wsp->walk_addr != 0) { 240 mdb_warn("allpages only supports global walks.\n"); 241 return (WALK_ERR); 242 } 243 244 if (mdb_layered_walk("memseg", wsp) == -1) { 245 mdb_warn("couldn't walk 'memseg'"); 246 return (WALK_ERR); 247 } 248 249 wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP); 250 return (WALK_NEXT); 251 } 252 253 int 254 allpages_walk_step(mdb_walk_state_t *wsp) 255 { 256 const struct memseg *msp = wsp->walk_layer; 257 page_t *buf = wsp->walk_data; 258 size_t pg_read, i; 259 size_t pg_num = msp->pages_end - msp->pages_base; 260 const page_t *pg_addr = msp->pages; 261 262 while (pg_num > 0) { 263 pg_read = MIN(pg_num, PAGE_BUFFER); 264 265 if (mdb_vread(buf, pg_read * sizeof (page_t), 266 (uintptr_t)pg_addr) == -1) { 267 mdb_warn("can't read page_t's at %#lx", pg_addr); 268 return (WALK_ERR); 269 } 270 for (i = 0; i < pg_read; i++) { 271 int ret = wsp->walk_callback((uintptr_t)&pg_addr[i], 272 &buf[i], wsp->walk_cbdata); 273 274 if (ret != WALK_NEXT) 275 return (ret); 276 } 277 pg_num -= pg_read; 278 pg_addr += pg_read; 279 } 280 281 return (WALK_NEXT); 282 } 283 284 void 285 allpages_walk_fini(mdb_walk_state_t *wsp) 286 { 287 mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER); 288 } 289 290 /* 291 * Hash table + LRU queue. 292 * This table is used to cache recently read vnodes for the memstat 293 * command, to reduce the number of mdb_vread calls. This greatly 294 * speeds the memstat command on on live, large CPU count systems. 295 */ 296 297 #define VN_SMALL 401 298 #define VN_LARGE 10007 299 #define VN_HTABLE_KEY(p, hp) ((p) % ((hp)->vn_htable_buckets)) 300 301 struct vn_htable_list { 302 uint_t vn_flag; /* v_flag from vnode */ 303 uintptr_t vn_ptr; /* pointer to vnode */ 304 struct vn_htable_list *vn_q_next; /* queue next pointer */ 305 struct vn_htable_list *vn_q_prev; /* queue prev pointer */ 306 struct vn_htable_list *vn_h_next; /* hash table pointer */ 307 }; 308 309 /* 310 * vn_q_first -> points to to head of queue: the vnode that was most 311 * recently used 312 * vn_q_last -> points to the oldest used vnode, and is freed once a new 313 * vnode is read. 314 * vn_htable -> hash table 315 * vn_htable_buf -> contains htable objects 316 * vn_htable_size -> total number of items in the hash table 317 * vn_htable_buckets -> number of buckets in the hash table 318 */ 319 typedef struct vn_htable { 320 struct vn_htable_list *vn_q_first; 321 struct vn_htable_list *vn_q_last; 322 struct vn_htable_list **vn_htable; 323 struct vn_htable_list *vn_htable_buf; 324 int vn_htable_size; 325 int vn_htable_buckets; 326 } vn_htable_t; 327 328 329 /* allocate memory, initilize hash table and LRU queue */ 330 static void 331 vn_htable_init(vn_htable_t *hp, size_t vn_size) 332 { 333 int i; 334 int htable_size = MAX(vn_size, VN_LARGE); 335 336 if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list) 337 * htable_size, UM_NOSLEEP|UM_GC)) == NULL) { 338 htable_size = VN_SMALL; 339 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list) 340 * htable_size, UM_SLEEP|UM_GC); 341 } 342 343 hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *) 344 * htable_size, UM_SLEEP|UM_GC); 345 346 hp->vn_q_first = &hp->vn_htable_buf[0]; 347 hp->vn_q_last = &hp->vn_htable_buf[htable_size - 1]; 348 hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1]; 349 hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2]; 350 351 for (i = 1; i < (htable_size-1); i++) { 352 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1]; 353 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1]; 354 } 355 356 hp->vn_htable_size = htable_size; 357 hp->vn_htable_buckets = htable_size; 358 } 359 360 361 /* 362 * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag. 363 * The function tries to find needed information in the following order: 364 * 365 * 1. check if ptr is the first in queue 366 * 2. check if ptr is in hash table (if so move it to the top of queue) 367 * 3. do mdb_vread, remove last queue item from queue and hash table. 368 * Insert new information to freed object, and put this object in to the 369 * top of the queue. 370 */ 371 static int 372 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr) 373 { 374 int hkey; 375 struct vn_htable_list *hent, **htmp, *q_next, *q_prev; 376 struct vn_htable_list *q_first = hp->vn_q_first; 377 378 /* 1. vnode ptr is the first in queue, just get v_flag and return */ 379 if (q_first->vn_ptr == ptr) { 380 vp->v_flag = q_first->vn_flag; 381 382 return (0); 383 } 384 385 /* 2. search the hash table for this ptr */ 386 hkey = VN_HTABLE_KEY(ptr, hp); 387 hent = hp->vn_htable[hkey]; 388 while (hent && (hent->vn_ptr != ptr)) 389 hent = hent->vn_h_next; 390 391 /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */ 392 if (hent == NULL) { 393 struct vnode vn; 394 395 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) { 396 mdb_warn("unable to read vnode_t at %#lx", ptr); 397 return (-1); 398 } 399 400 /* we will insert read data into the last element in queue */ 401 hent = hp->vn_q_last; 402 403 /* remove last hp->vn_q_last object from hash table */ 404 if (hent->vn_ptr) { 405 htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)]; 406 while (*htmp != hent) 407 htmp = &(*htmp)->vn_h_next; 408 *htmp = hent->vn_h_next; 409 } 410 411 /* insert data into new free object */ 412 hent->vn_ptr = ptr; 413 hent->vn_flag = vn.v_flag; 414 415 /* insert new object into hash table */ 416 hent->vn_h_next = hp->vn_htable[hkey]; 417 hp->vn_htable[hkey] = hent; 418 } 419 420 /* Remove from queue. hent is not first, vn_q_prev is not NULL */ 421 q_next = hent->vn_q_next; 422 q_prev = hent->vn_q_prev; 423 if (q_next == NULL) 424 hp->vn_q_last = q_prev; 425 else 426 q_next->vn_q_prev = q_prev; 427 q_prev->vn_q_next = q_next; 428 429 /* Add to the front of queue */ 430 hent->vn_q_prev = NULL; 431 hent->vn_q_next = q_first; 432 q_first->vn_q_prev = hent; 433 hp->vn_q_first = hent; 434 435 /* Set v_flag in vnode pointer from hent */ 436 vp->v_flag = hent->vn_flag; 437 438 return (0); 439 } 440 441 /* Summary statistics of pages */ 442 typedef struct memstat { 443 struct vnode *ms_kvp; /* Cached address of kernel vnode */ 444 struct vnode *ms_unused_vp; /* Unused pages vnode pointer */ 445 struct vnode *ms_zvp; /* Cached address of zio vnode */ 446 uint64_t ms_kmem; /* Pages of kernel memory */ 447 uint64_t ms_zfs_data; /* Pages of zfs data */ 448 uint64_t ms_anon; /* Pages of anonymous memory */ 449 uint64_t ms_vnode; /* Pages of named (vnode) memory */ 450 uint64_t ms_exec; /* Pages of exec/library memory */ 451 uint64_t ms_cachelist; /* Pages on the cachelist (free) */ 452 uint64_t ms_bootpages; /* Pages on the bootpages list */ 453 uint64_t ms_total; /* Pages on page hash */ 454 vn_htable_t *ms_vn_htable; /* Pointer to hash table */ 455 struct vnode ms_vn; /* vnode buffer */ 456 } memstat_t; 457 458 #define MS_PP_ISKAS(pp, stats) \ 459 ((pp)->p_vnode == (stats)->ms_kvp) 460 461 #define MS_PP_ISZFS_DATA(pp, stats) \ 462 (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp)) 463 464 /* 465 * Summarize pages by type and update stat information 466 */ 467 468 /* ARGSUSED */ 469 static int 470 memstat_callback(page_t *page, page_t *pp, memstat_t *stats) 471 { 472 struct vnode *vp = &stats->ms_vn; 473 474 if (PP_ISBOOTPAGES(pp)) 475 stats->ms_bootpages++; 476 else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp) 477 return (WALK_NEXT); 478 else if (MS_PP_ISKAS(pp, stats)) 479 stats->ms_kmem++; 480 else if (MS_PP_ISZFS_DATA(pp, stats)) 481 stats->ms_zfs_data++; 482 else if (PP_ISFREE(pp)) 483 stats->ms_cachelist++; 484 else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode)) 485 return (WALK_ERR); 486 else if (IS_SWAPFSVP(vp)) 487 stats->ms_anon++; 488 else if ((vp->v_flag & VVMEXEC) != 0) 489 stats->ms_exec++; 490 else 491 stats->ms_vnode++; 492 493 stats->ms_total++; 494 495 return (WALK_NEXT); 496 } 497 498 /* ARGSUSED */ 499 int 500 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 501 { 502 pgcnt_t total_pages, physmem; 503 ulong_t freemem; 504 memstat_t stats; 505 GElf_Sym sym; 506 vn_htable_t ht; 507 struct vnode *kvps; 508 uintptr_t vn_size = 0; 509 510 bzero(&stats, sizeof (memstat_t)); 511 512 /* 513 * -s size, is an internal option. It specifies the size of vn_htable. 514 * Hash table size is set in the following order: 515 * If user has specified the size that is larger than VN_LARGE: try it, 516 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if 517 * failed to allocate default to VN_SMALL. 518 * For a better efficiency of hash table it is highly recommended to 519 * set size to a prime number. 520 */ 521 if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv, 522 's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc) 523 return (DCMD_USAGE); 524 525 /* Initialize vnode hash list and queue */ 526 vn_htable_init(&ht, vn_size); 527 stats.ms_vn_htable = &ht; 528 529 /* Total physical memory */ 530 if (mdb_readvar(&total_pages, "total_pages") == -1) { 531 mdb_warn("unable to read total_pages"); 532 return (DCMD_ERR); 533 } 534 535 /* Artificially limited memory */ 536 if (mdb_readvar(&physmem, "physmem") == -1) { 537 mdb_warn("unable to read physmem"); 538 return (DCMD_ERR); 539 } 540 541 /* read kernel vnode array pointer */ 542 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps", 543 (GElf_Sym *)&sym) == -1) { 544 mdb_warn("unable to read kvps"); 545 return (DCMD_ERR); 546 } 547 kvps = (struct vnode *)(uintptr_t)sym.st_value; 548 stats.ms_kvp = &kvps[KV_KVP]; 549 550 /* 551 * Read the zio vnode pointer. 552 */ 553 stats.ms_zvp = &kvps[KV_ZVP]; 554 555 /* 556 * If physmem != total_pages, then the administrator has limited the 557 * number of pages available in the system. Excluded pages are 558 * associated with the unused pages vnode. Read this vnode so the 559 * pages can be excluded in the page accounting. 560 */ 561 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp", 562 (GElf_Sym *)&sym) == -1) { 563 mdb_warn("unable to read unused_pages_vp"); 564 return (DCMD_ERR); 565 } 566 stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value; 567 568 /* walk all pages, collect statistics */ 569 if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback, 570 &stats) == -1) { 571 mdb_warn("can't walk memseg"); 572 return (DCMD_ERR); 573 } 574 575 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \ 576 ((physmem) * 10))) 577 578 mdb_printf("Page Summary Pages MB" 579 " %%Tot\n"); 580 mdb_printf("------------ ---------------- ----------------" 581 " ----\n"); 582 mdb_printf("Kernel %16llu %16llu %3lu%%\n", 583 stats.ms_kmem, 584 (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024), 585 MS_PCT_TOTAL(stats.ms_kmem)); 586 587 if (stats.ms_bootpages != 0) { 588 mdb_printf("Boot pages %16llu %16llu %3lu%%\n", 589 stats.ms_bootpages, 590 (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024), 591 MS_PCT_TOTAL(stats.ms_bootpages)); 592 } 593 594 if (stats.ms_zfs_data != 0) { 595 mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n", 596 stats.ms_zfs_data, 597 (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024), 598 MS_PCT_TOTAL(stats.ms_zfs_data)); 599 } 600 601 mdb_printf("Anon %16llu %16llu %3lu%%\n", 602 stats.ms_anon, 603 (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024), 604 MS_PCT_TOTAL(stats.ms_anon)); 605 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n", 606 stats.ms_exec, 607 (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024), 608 MS_PCT_TOTAL(stats.ms_exec)); 609 mdb_printf("Page cache %16llu %16llu %3lu%%\n", 610 stats.ms_vnode, 611 (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024), 612 MS_PCT_TOTAL(stats.ms_vnode)); 613 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n", 614 stats.ms_cachelist, 615 (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024), 616 MS_PCT_TOTAL(stats.ms_cachelist)); 617 618 /* 619 * occasionally, we double count pages above. To avoid printing 620 * absurdly large values for freemem, we clamp it at zero. 621 */ 622 if (physmem > stats.ms_total) 623 freemem = physmem - stats.ms_total; 624 else 625 freemem = 0; 626 627 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem, 628 (uint64_t)freemem * PAGESIZE / (1024 * 1024), 629 MS_PCT_TOTAL(freemem)); 630 631 mdb_printf("\nTotal %16lu %16lu\n", 632 physmem, 633 (uint64_t)physmem * PAGESIZE / (1024 * 1024)); 634 635 if (physmem != total_pages) { 636 mdb_printf("Physical %16lu %16lu\n", 637 total_pages, 638 (uint64_t)total_pages * PAGESIZE / (1024 * 1024)); 639 } 640 641 #undef MS_PCT_TOTAL 642 643 return (DCMD_OK); 644 } 645 646 void 647 pagelookup_help(void) 648 { 649 mdb_printf( 650 "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n" 651 "\n" 652 "Can be invoked three different ways:\n\n" 653 " ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n" 654 " %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n" 655 " %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n" 656 "\n" 657 "The latter two forms are useful in pipelines.\n"); 658 } 659 660 int 661 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 662 { 663 uintptr_t vp = -(uintptr_t)1; 664 uint64_t offset = -(uint64_t)1; 665 666 uintptr_t pageaddr; 667 int hasaddr = (flags & DCMD_ADDRSPEC); 668 int usedaddr = 0; 669 670 if (mdb_getopts(argc, argv, 671 'v', MDB_OPT_UINTPTR, &vp, 672 'o', MDB_OPT_UINT64, &offset, 673 0) != argc) { 674 return (DCMD_USAGE); 675 } 676 677 if (vp == -(uintptr_t)1) { 678 if (offset == -(uint64_t)1) { 679 mdb_warn( 680 "pagelookup: at least one of -v vp or -o offset " 681 "required.\n"); 682 return (DCMD_USAGE); 683 } 684 vp = addr; 685 usedaddr = 1; 686 } else if (offset == -(uint64_t)1) { 687 offset = mdb_get_dot(); 688 usedaddr = 1; 689 } 690 if (usedaddr && !hasaddr) { 691 mdb_warn("pagelookup: address required\n"); 692 return (DCMD_USAGE); 693 } 694 if (!usedaddr && hasaddr) { 695 mdb_warn( 696 "pagelookup: address specified when both -v and -o were " 697 "passed"); 698 return (DCMD_USAGE); 699 } 700 701 pageaddr = mdb_page_lookup(vp, offset); 702 if (pageaddr == 0) { 703 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n", 704 vp, offset); 705 return (DCMD_OK); 706 } 707 mdb_printf("%#lr\n", pageaddr); /* this is PIPE_OUT friendly */ 708 return (DCMD_OK); 709 } 710 711 /*ARGSUSED*/ 712 int 713 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 714 { 715 uintptr_t pp; 716 717 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) { 718 return (DCMD_USAGE); 719 } 720 721 pp = mdb_pfn2page((pfn_t)addr); 722 if (pp == 0) { 723 return (DCMD_ERR); 724 } 725 726 if (flags & DCMD_PIPE_OUT) { 727 mdb_printf("%#lr\n", pp); 728 } else { 729 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp); 730 } 731 732 return (DCMD_OK); 733 } 734 735 int 736 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 737 { 738 page_t p; 739 740 if (!(flags & DCMD_ADDRSPEC)) { 741 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) { 742 mdb_warn("can't walk pages"); 743 return (DCMD_ERR); 744 } 745 return (DCMD_OK); 746 } 747 748 if (DCMD_HDRSPEC(flags)) { 749 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n", 750 "PAGE", "VNODE", "OFFSET", "SELOCK", 751 "LCT", "COW", "IO", "FS", "ST"); 752 } 753 754 if (mdb_vread(&p, sizeof (page_t), addr) == -1) { 755 mdb_warn("can't read page_t at %#lx", addr); 756 return (DCMD_ERR); 757 } 758 759 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n", 760 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt, 761 p.p_iolock_state, p.p_fsdata, p.p_state); 762 763 return (DCMD_OK); 764 } 765 766 int 767 swap_walk_init(mdb_walk_state_t *wsp) 768 { 769 void *ptr; 770 771 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) { 772 mdb_warn("swapinfo not found or invalid"); 773 return (WALK_ERR); 774 } 775 776 wsp->walk_addr = (uintptr_t)ptr; 777 778 return (WALK_NEXT); 779 } 780 781 int 782 swap_walk_step(mdb_walk_state_t *wsp) 783 { 784 uintptr_t sip; 785 struct swapinfo si; 786 787 sip = wsp->walk_addr; 788 789 if (sip == NULL) 790 return (WALK_DONE); 791 792 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) { 793 mdb_warn("unable to read swapinfo at %#lx", sip); 794 return (WALK_ERR); 795 } 796 797 wsp->walk_addr = (uintptr_t)si.si_next; 798 799 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata)); 800 } 801 802 int 803 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 804 { 805 struct swapinfo si; 806 char *name; 807 808 if (!(flags & DCMD_ADDRSPEC)) { 809 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) { 810 mdb_warn("can't walk swapinfo"); 811 return (DCMD_ERR); 812 } 813 return (DCMD_OK); 814 } 815 816 if (DCMD_HDRSPEC(flags)) { 817 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n", 818 "ADDR", "VNODE", "PAGES", "FREE", "NAME"); 819 } 820 821 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) { 822 mdb_warn("can't read swapinfo at %#lx", addr); 823 return (DCMD_ERR); 824 } 825 826 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC); 827 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1) 828 name = "*error*"; 829 830 mdb_printf("%0?lx %?p %9d %9d %s\n", 831 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name); 832 833 return (DCMD_OK); 834 } 835 836 int 837 memlist_walk_step(mdb_walk_state_t *wsp) 838 { 839 uintptr_t mlp; 840 struct memlist ml; 841 842 mlp = wsp->walk_addr; 843 844 if (mlp == NULL) 845 return (WALK_DONE); 846 847 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) { 848 mdb_warn("unable to read memlist at %#lx", mlp); 849 return (WALK_ERR); 850 } 851 852 wsp->walk_addr = (uintptr_t)ml.ml_next; 853 854 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata)); 855 } 856 857 int 858 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 859 { 860 struct memlist ml; 861 862 if (!(flags & DCMD_ADDRSPEC)) { 863 uintptr_t ptr; 864 uint_t list = 0; 865 int i; 866 static const char *lists[] = { 867 "phys_install", 868 "phys_avail", 869 "virt_avail" 870 }; 871 872 if (mdb_getopts(argc, argv, 873 'i', MDB_OPT_SETBITS, (1 << 0), &list, 874 'a', MDB_OPT_SETBITS, (1 << 1), &list, 875 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc) 876 return (DCMD_USAGE); 877 878 if (!list) 879 list = 1; 880 881 for (i = 0; list; i++, list >>= 1) { 882 if (!(list & 1)) 883 continue; 884 if ((mdb_readvar(&ptr, lists[i]) == -1) || 885 (ptr == NULL)) { 886 mdb_warn("%s not found or invalid", lists[i]); 887 return (DCMD_ERR); 888 } 889 890 mdb_printf("%s:\n", lists[i]); 891 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL, 892 ptr) == -1) { 893 mdb_warn("can't walk memlist"); 894 return (DCMD_ERR); 895 } 896 } 897 return (DCMD_OK); 898 } 899 900 if (DCMD_HDRSPEC(flags)) 901 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE"); 902 903 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) { 904 mdb_warn("can't read memlist at %#lx", addr); 905 return (DCMD_ERR); 906 } 907 908 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size); 909 910 return (DCMD_OK); 911 } 912 913 int 914 seg_walk_init(mdb_walk_state_t *wsp) 915 { 916 if (wsp->walk_addr == NULL) { 917 mdb_warn("seg walk must begin at struct as *\n"); 918 return (WALK_ERR); 919 } 920 921 /* 922 * this is really just a wrapper to AVL tree walk 923 */ 924 wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree; 925 return (avl_walk_init(wsp)); 926 } 927 928 /*ARGSUSED*/ 929 int 930 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 931 { 932 struct seg s; 933 934 if (argc != 0) 935 return (DCMD_USAGE); 936 937 if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) { 938 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n", 939 "SEG", "BASE", "SIZE", "DATA", "OPS"); 940 } 941 942 if (mdb_vread(&s, sizeof (s), addr) == -1) { 943 mdb_warn("failed to read seg at %p", addr); 944 return (DCMD_ERR); 945 } 946 947 mdb_printf("%?p %?p %?lx %?p %a\n", 948 addr, s.s_base, s.s_size, s.s_data, s.s_ops); 949 950 return (DCMD_OK); 951 } 952 953 typedef struct pmap_walk_types { 954 uintptr_t pwt_segvn; 955 uintptr_t pwt_seghole; 956 } pmap_walk_types_t; 957 958 /*ARGSUSED*/ 959 static int 960 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out) 961 { 962 pgcnt_t *nres = out; 963 964 (*nres)++; 965 966 return (WALK_NEXT); 967 } 968 969 static int 970 pmap_walk_seg(uintptr_t addr, const struct seg *seg, 971 const pmap_walk_types_t *types) 972 { 973 const uintptr_t ops = (uintptr_t)seg->s_ops; 974 975 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024); 976 977 if (ops == types->pwt_segvn && seg->s_data != NULL) { 978 struct segvn_data svn; 979 pgcnt_t nres = 0; 980 981 svn.vp = NULL; 982 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data); 983 984 /* 985 * Use the segvn_pages walker to find all of the in-core pages 986 * for this mapping. 987 */ 988 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres, 989 (uintptr_t)seg->s_data) == -1) { 990 mdb_warn("failed to walk segvn_pages (s_data=%p)", 991 seg->s_data); 992 } 993 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024); 994 995 if (svn.vp != NULL) { 996 char buf[29]; 997 998 mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf)); 999 mdb_printf(" %s", buf); 1000 } else { 1001 mdb_printf(" [ anon ]"); 1002 } 1003 } else if (ops == types->pwt_seghole && seg->s_data != NULL) { 1004 seghole_data_t shd; 1005 char name[16]; 1006 1007 (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data); 1008 if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name), 1009 (uintptr_t)shd.shd_name) == 0) { 1010 name[0] = '\0'; 1011 } 1012 1013 mdb_printf(" %8s [ hole%s%s ]", "-", 1014 name[0] == '0' ? "" : ":", name); 1015 } else { 1016 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops); 1017 } 1018 1019 mdb_printf("\n"); 1020 return (WALK_NEXT); 1021 } 1022 1023 static int 1024 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, 1025 const pmap_walk_types_t *types) 1026 { 1027 const uintptr_t ops = (uintptr_t)seg->s_ops; 1028 1029 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024); 1030 1031 if (ops == types->pwt_segvn && seg->s_data != NULL) { 1032 struct segvn_data svn; 1033 1034 svn.vp = NULL; 1035 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data); 1036 1037 if (svn.vp != NULL) { 1038 mdb_printf(" %0?p", svn.vp); 1039 } else { 1040 mdb_printf(" [ anon ]"); 1041 } 1042 } else { 1043 mdb_printf(" [ &%a ]", seg->s_ops); 1044 } 1045 1046 mdb_printf("\n"); 1047 return (WALK_NEXT); 1048 } 1049 1050 /*ARGSUSED*/ 1051 int 1052 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1053 { 1054 proc_t proc; 1055 uint_t quick = FALSE; 1056 mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg; 1057 pmap_walk_types_t wtypes = { 0 }; 1058 1059 GElf_Sym sym; 1060 1061 if (!(flags & DCMD_ADDRSPEC)) 1062 return (DCMD_USAGE); 1063 1064 if (mdb_getopts(argc, argv, 1065 'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc) 1066 return (DCMD_USAGE); 1067 1068 if (mdb_vread(&proc, sizeof (proc), addr) == -1) { 1069 mdb_warn("failed to read proc at %p", addr); 1070 return (DCMD_ERR); 1071 } 1072 1073 if (mdb_lookup_by_name("segvn_ops", &sym) == 0) 1074 wtypes.pwt_segvn = (uintptr_t)sym.st_value; 1075 if (mdb_lookup_by_name("seghole_ops", &sym) == 0) 1076 wtypes.pwt_seghole = (uintptr_t)sym.st_value; 1077 1078 mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE"); 1079 1080 if (quick) { 1081 mdb_printf("VNODE\n"); 1082 cb = (mdb_walk_cb_t)pmap_walk_seg_quick; 1083 } else { 1084 mdb_printf("%8s %s\n", "RES", "PATH"); 1085 } 1086 1087 if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) { 1088 mdb_warn("failed to walk segments of as %p", proc.p_as); 1089 return (DCMD_ERR); 1090 } 1091 1092 return (DCMD_OK); 1093 } 1094 1095 typedef struct anon_walk_data { 1096 uintptr_t *aw_levone; 1097 uintptr_t *aw_levtwo; 1098 size_t aw_minslot; 1099 size_t aw_maxslot; 1100 pgcnt_t aw_nlevone; 1101 pgcnt_t aw_levone_ndx; 1102 size_t aw_levtwo_ndx; 1103 struct anon_map *aw_ampp; 1104 struct anon_map aw_amp; 1105 struct anon_hdr aw_ahp; 1106 int aw_all; /* report all anon pointers, even NULLs */ 1107 } anon_walk_data_t; 1108 1109 int 1110 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot) 1111 { 1112 anon_walk_data_t *aw; 1113 1114 if (wsp->walk_addr == NULL) { 1115 mdb_warn("anon walk doesn't support global walks\n"); 1116 return (WALK_ERR); 1117 } 1118 1119 aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP); 1120 aw->aw_ampp = (struct anon_map *)wsp->walk_addr; 1121 1122 if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) { 1123 mdb_warn("failed to read anon map at %p", wsp->walk_addr); 1124 mdb_free(aw, sizeof (anon_walk_data_t)); 1125 return (WALK_ERR); 1126 } 1127 1128 if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp), 1129 (uintptr_t)(aw->aw_amp.ahp)) == -1) { 1130 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp); 1131 mdb_free(aw, sizeof (anon_walk_data_t)); 1132 return (WALK_ERR); 1133 } 1134 1135 /* update min and maxslot with the given constraints */ 1136 maxslot = MIN(maxslot, aw->aw_ahp.size); 1137 minslot = MIN(minslot, maxslot); 1138 1139 if (aw->aw_ahp.size <= ANON_CHUNK_SIZE || 1140 (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) { 1141 aw->aw_nlevone = maxslot; 1142 aw->aw_levone_ndx = minslot; 1143 aw->aw_levtwo = NULL; 1144 } else { 1145 aw->aw_nlevone = 1146 (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT; 1147 aw->aw_levone_ndx = 0; 1148 aw->aw_levtwo = 1149 mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP); 1150 } 1151 1152 aw->aw_levone = 1153 mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP); 1154 aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL); 1155 1156 mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t), 1157 (uintptr_t)aw->aw_ahp.array_chunk); 1158 1159 aw->aw_levtwo_ndx = 0; 1160 aw->aw_minslot = minslot; 1161 aw->aw_maxslot = maxslot; 1162 1163 out: 1164 wsp->walk_data = aw; 1165 return (0); 1166 } 1167 1168 int 1169 anon_walk_step(mdb_walk_state_t *wsp) 1170 { 1171 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data; 1172 struct anon anon; 1173 uintptr_t anonptr; 1174 ulong_t slot; 1175 1176 /* 1177 * Once we've walked through level one, we're done. 1178 */ 1179 if (aw->aw_levone_ndx >= aw->aw_nlevone) { 1180 return (WALK_DONE); 1181 } 1182 1183 if (aw->aw_levtwo == NULL) { 1184 anonptr = aw->aw_levone[aw->aw_levone_ndx]; 1185 aw->aw_levone_ndx++; 1186 } else { 1187 if (aw->aw_levtwo_ndx == 0) { 1188 uintptr_t levtwoptr; 1189 1190 /* The first time through, skip to our first index. */ 1191 if (aw->aw_levone_ndx == 0) { 1192 aw->aw_levone_ndx = 1193 aw->aw_minslot / ANON_CHUNK_SIZE; 1194 aw->aw_levtwo_ndx = 1195 aw->aw_minslot % ANON_CHUNK_SIZE; 1196 } 1197 1198 levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx]; 1199 1200 if (levtwoptr == NULL) { 1201 if (!aw->aw_all) { 1202 aw->aw_levtwo_ndx = 0; 1203 aw->aw_levone_ndx++; 1204 return (WALK_NEXT); 1205 } 1206 bzero(aw->aw_levtwo, 1207 ANON_CHUNK_SIZE * sizeof (uintptr_t)); 1208 1209 } else if (mdb_vread(aw->aw_levtwo, 1210 ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) == 1211 -1) { 1212 mdb_warn("unable to read anon_map %p's " 1213 "second-level map %d at %p", 1214 aw->aw_ampp, aw->aw_levone_ndx, 1215 levtwoptr); 1216 return (WALK_ERR); 1217 } 1218 } 1219 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx; 1220 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx]; 1221 1222 /* update the indices for next time */ 1223 aw->aw_levtwo_ndx++; 1224 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) { 1225 aw->aw_levtwo_ndx = 0; 1226 aw->aw_levone_ndx++; 1227 } 1228 1229 /* make sure the slot # is in the requested range */ 1230 if (slot >= aw->aw_maxslot) { 1231 return (WALK_DONE); 1232 } 1233 } 1234 1235 if (anonptr != NULL) { 1236 mdb_vread(&anon, sizeof (anon), anonptr); 1237 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata)); 1238 } 1239 if (aw->aw_all) { 1240 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata)); 1241 } 1242 return (WALK_NEXT); 1243 } 1244 1245 void 1246 anon_walk_fini(mdb_walk_state_t *wsp) 1247 { 1248 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data; 1249 1250 if (aw->aw_levtwo != NULL) 1251 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t)); 1252 1253 mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t)); 1254 mdb_free(aw, sizeof (anon_walk_data_t)); 1255 } 1256 1257 int 1258 anon_walk_init(mdb_walk_state_t *wsp) 1259 { 1260 return (anon_walk_init_common(wsp, 0, ULONG_MAX)); 1261 } 1262 1263 int 1264 segvn_anon_walk_init(mdb_walk_state_t *wsp) 1265 { 1266 const uintptr_t svd_addr = wsp->walk_addr; 1267 uintptr_t amp_addr; 1268 uintptr_t seg_addr; 1269 struct segvn_data svd; 1270 struct anon_map amp; 1271 struct seg seg; 1272 1273 if (svd_addr == NULL) { 1274 mdb_warn("segvn_anon walk doesn't support global walks\n"); 1275 return (WALK_ERR); 1276 } 1277 if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) { 1278 mdb_warn("segvn_anon walk: unable to read segvn_data at %p", 1279 svd_addr); 1280 return (WALK_ERR); 1281 } 1282 if (svd.amp == NULL) { 1283 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n", 1284 svd_addr); 1285 return (WALK_ERR); 1286 } 1287 amp_addr = (uintptr_t)svd.amp; 1288 if (mdb_vread(&, sizeof (amp), amp_addr) == -1) { 1289 mdb_warn("segvn_anon walk: unable to read amp %p for " 1290 "segvn_data %p", amp_addr, svd_addr); 1291 return (WALK_ERR); 1292 } 1293 seg_addr = (uintptr_t)svd.seg; 1294 if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) { 1295 mdb_warn("segvn_anon walk: unable to read seg %p for " 1296 "segvn_data %p", seg_addr, svd_addr); 1297 return (WALK_ERR); 1298 } 1299 if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) { 1300 mdb_warn("anon map %p is too small for segment %p\n", 1301 amp_addr, seg_addr); 1302 return (WALK_ERR); 1303 } 1304 1305 wsp->walk_addr = amp_addr; 1306 return (anon_walk_init_common(wsp, 1307 svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT))); 1308 } 1309 1310 1311 typedef struct { 1312 u_offset_t svs_offset; 1313 uintptr_t svs_page; 1314 } segvn_sparse_t; 1315 #define SEGVN_MAX_SPARSE ((128 * 1024) / sizeof (segvn_sparse_t)) 1316 1317 typedef struct { 1318 uintptr_t svw_svdp; 1319 struct segvn_data svw_svd; 1320 struct seg svw_seg; 1321 size_t svw_walkoff; 1322 ulong_t svw_anonskip; 1323 segvn_sparse_t *svw_sparse; 1324 size_t svw_sparse_idx; 1325 size_t svw_sparse_count; 1326 size_t svw_sparse_size; 1327 uint8_t svw_sparse_overflow; 1328 uint8_t svw_all; 1329 } segvn_walk_data_t; 1330 1331 static int 1332 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg) 1333 { 1334 segvn_walk_data_t *const svw = arg; 1335 const page_t *const pp = pp_arg; 1336 const u_offset_t offset = pp->p_offset; 1337 segvn_sparse_t *const cur = 1338 &svw->svw_sparse[svw->svw_sparse_count]; 1339 1340 /* See if the page is of interest */ 1341 if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) { 1342 return (WALK_NEXT); 1343 } 1344 /* See if we have space for the new entry, then add it. */ 1345 if (svw->svw_sparse_count >= svw->svw_sparse_size) { 1346 svw->svw_sparse_overflow = 1; 1347 return (WALK_DONE); 1348 } 1349 svw->svw_sparse_count++; 1350 cur->svs_offset = offset; 1351 cur->svs_page = addr; 1352 return (WALK_NEXT); 1353 } 1354 1355 static int 1356 segvn_sparse_cmp(const void *lp, const void *rp) 1357 { 1358 const segvn_sparse_t *const l = lp; 1359 const segvn_sparse_t *const r = rp; 1360 1361 if (l->svs_offset < r->svs_offset) { 1362 return (-1); 1363 } 1364 if (l->svs_offset > r->svs_offset) { 1365 return (1); 1366 } 1367 return (0); 1368 } 1369 1370 /* 1371 * Builds on the "anon_all" walker to walk all resident pages in a segvn_data 1372 * structure. For segvn_datas without an anon structure, it just looks up 1373 * pages in the vnode. For segvn_datas with an anon structure, NULL slots 1374 * pass through to the vnode, and non-null slots are checked for residency. 1375 */ 1376 int 1377 segvn_pages_walk_init(mdb_walk_state_t *wsp) 1378 { 1379 segvn_walk_data_t *svw; 1380 struct segvn_data *svd; 1381 1382 if (wsp->walk_addr == NULL) { 1383 mdb_warn("segvn walk doesn't support global walks\n"); 1384 return (WALK_ERR); 1385 } 1386 1387 svw = mdb_zalloc(sizeof (*svw), UM_SLEEP); 1388 svw->svw_svdp = wsp->walk_addr; 1389 svw->svw_anonskip = 0; 1390 svw->svw_sparse_idx = 0; 1391 svw->svw_walkoff = 0; 1392 svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL); 1393 1394 if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) == 1395 -1) { 1396 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr); 1397 mdb_free(svw, sizeof (*svw)); 1398 return (WALK_ERR); 1399 } 1400 1401 svd = &svw->svw_svd; 1402 if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg), 1403 (uintptr_t)svd->seg) == -1) { 1404 mdb_warn("failed to read seg at %p (from %p)", 1405 svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg); 1406 mdb_free(svw, sizeof (*svw)); 1407 return (WALK_ERR); 1408 } 1409 1410 if (svd->amp == NULL && svd->vp == NULL) { 1411 /* make the walk terminate immediately; no pages */ 1412 svw->svw_walkoff = svw->svw_seg.s_size; 1413 1414 } else if (svd->amp == NULL && 1415 (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) { 1416 /* 1417 * If we don't have an anon pointer, and the segment is large, 1418 * we try to load the in-memory pages into a fixed-size array, 1419 * which is then sorted and reported directly. This is much 1420 * faster than doing a mdb_page_lookup() for each possible 1421 * offset. 1422 * 1423 * If the allocation fails, or there are too many pages 1424 * in-core, we fall back to looking up the pages individually. 1425 */ 1426 svw->svw_sparse = mdb_alloc( 1427 SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP); 1428 if (svw->svw_sparse != NULL) { 1429 svw->svw_sparse_size = SEGVN_MAX_SPARSE; 1430 1431 if (mdb_pwalk("page", segvn_sparse_fill, svw, 1432 (uintptr_t)svd->vp) == -1 || 1433 svw->svw_sparse_overflow) { 1434 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE * 1435 sizeof (*svw->svw_sparse)); 1436 svw->svw_sparse = NULL; 1437 } else { 1438 qsort(svw->svw_sparse, svw->svw_sparse_count, 1439 sizeof (*svw->svw_sparse), 1440 segvn_sparse_cmp); 1441 } 1442 } 1443 1444 } else if (svd->amp != NULL) { 1445 const char *const layer = (!svw->svw_all && svd->vp == NULL) ? 1446 "segvn_anon" : "segvn_anon_all"; 1447 /* 1448 * If we're not printing all offsets, and the segvn_data has 1449 * no backing VP, we can use the "segvn_anon" walker, which 1450 * efficiently skips NULL slots. 1451 * 1452 * Otherwise, we layer over the "segvn_anon_all" walker 1453 * (which reports all anon slots, even NULL ones), so that 1454 * segvn_pages_walk_step() knows the precise offset for each 1455 * element. It uses that offset information to look up the 1456 * backing pages for NULL anon slots. 1457 */ 1458 if (mdb_layered_walk(layer, wsp) == -1) { 1459 mdb_warn("segvn_pages: failed to layer \"%s\" " 1460 "for segvn_data %p", layer, svw->svw_svdp); 1461 mdb_free(svw, sizeof (*svw)); 1462 return (WALK_ERR); 1463 } 1464 } 1465 1466 wsp->walk_data = svw; 1467 return (WALK_NEXT); 1468 } 1469 1470 int 1471 segvn_pages_walk_step(mdb_walk_state_t *wsp) 1472 { 1473 segvn_walk_data_t *const svw = wsp->walk_data; 1474 struct seg *const seg = &svw->svw_seg; 1475 struct segvn_data *const svd = &svw->svw_svd; 1476 uintptr_t pp; 1477 page_t page; 1478 1479 /* If we've walked off the end of the segment, we're done. */ 1480 if (svw->svw_walkoff >= seg->s_size) { 1481 return (WALK_DONE); 1482 } 1483 1484 /* 1485 * If we've got a sparse page array, just send it directly. 1486 */ 1487 if (svw->svw_sparse != NULL) { 1488 u_offset_t off; 1489 1490 if (svw->svw_sparse_idx >= svw->svw_sparse_count) { 1491 pp = NULL; 1492 if (!svw->svw_all) { 1493 return (WALK_DONE); 1494 } 1495 } else { 1496 segvn_sparse_t *const svs = 1497 &svw->svw_sparse[svw->svw_sparse_idx]; 1498 off = svs->svs_offset - svd->offset; 1499 if (svw->svw_all && svw->svw_walkoff != off) { 1500 pp = NULL; 1501 } else { 1502 pp = svs->svs_page; 1503 svw->svw_sparse_idx++; 1504 } 1505 } 1506 1507 } else if (svd->amp == NULL || wsp->walk_addr == NULL) { 1508 /* 1509 * If there's no anon, or the anon slot is NULL, look up 1510 * <vp, offset>. 1511 */ 1512 if (svd->vp != NULL) { 1513 pp = mdb_page_lookup((uintptr_t)svd->vp, 1514 svd->offset + svw->svw_walkoff); 1515 } else { 1516 pp = NULL; 1517 } 1518 1519 } else { 1520 const struct anon *const anon = wsp->walk_layer; 1521 1522 /* 1523 * We have a "struct anon"; if it's not swapped out, 1524 * look up the page. 1525 */ 1526 if (anon->an_vp != NULL || anon->an_off != 0) { 1527 pp = mdb_page_lookup((uintptr_t)anon->an_vp, 1528 anon->an_off); 1529 if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) { 1530 mdb_warn("walk segvn_pages: segvn_data %p " 1531 "offset %ld, anon page <%p, %llx> not " 1532 "found.\n", svw->svw_svdp, svw->svw_walkoff, 1533 anon->an_vp, anon->an_off); 1534 } 1535 } else { 1536 if (anon->an_pvp == NULL) { 1537 mdb_warn("walk segvn_pages: useless struct " 1538 "anon at %p\n", wsp->walk_addr); 1539 } 1540 pp = NULL; /* nothing at this offset */ 1541 } 1542 } 1543 1544 svw->svw_walkoff += PAGESIZE; /* Update for the next call */ 1545 if (pp != NULL) { 1546 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 1547 mdb_warn("unable to read page_t at %#lx", pp); 1548 return (WALK_ERR); 1549 } 1550 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata)); 1551 } 1552 if (svw->svw_all) { 1553 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata)); 1554 } 1555 return (WALK_NEXT); 1556 } 1557 1558 void 1559 segvn_pages_walk_fini(mdb_walk_state_t *wsp) 1560 { 1561 segvn_walk_data_t *const svw = wsp->walk_data; 1562 1563 if (svw->svw_sparse != NULL) { 1564 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE * 1565 sizeof (*svw->svw_sparse)); 1566 } 1567 mdb_free(svw, sizeof (*svw)); 1568 } 1569 1570 /* 1571 * Grumble, grumble. 1572 */ 1573 #define SMAP_HASHFUNC(vp, off) \ 1574 ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 1575 ((off) >> MAXBSHIFT)) & smd_hashmsk) 1576 1577 int 1578 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1579 { 1580 long smd_hashmsk; 1581 int hash; 1582 uintptr_t offset = 0; 1583 struct smap smp; 1584 uintptr_t saddr, kaddr; 1585 uintptr_t smd_hash, smd_smap; 1586 struct seg seg; 1587 1588 if (!(flags & DCMD_ADDRSPEC)) 1589 return (DCMD_USAGE); 1590 1591 if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) { 1592 mdb_warn("failed to read smd_hashmsk"); 1593 return (DCMD_ERR); 1594 } 1595 1596 if (mdb_readvar(&smd_hash, "smd_hash") == -1) { 1597 mdb_warn("failed to read smd_hash"); 1598 return (DCMD_ERR); 1599 } 1600 1601 if (mdb_readvar(&smd_smap, "smd_smap") == -1) { 1602 mdb_warn("failed to read smd_hash"); 1603 return (DCMD_ERR); 1604 } 1605 1606 if (mdb_readvar(&kaddr, "segkmap") == -1) { 1607 mdb_warn("failed to read segkmap"); 1608 return (DCMD_ERR); 1609 } 1610 1611 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) { 1612 mdb_warn("failed to read segkmap at %p", kaddr); 1613 return (DCMD_ERR); 1614 } 1615 1616 if (argc != 0) { 1617 const mdb_arg_t *arg = &argv[0]; 1618 1619 if (arg->a_type == MDB_TYPE_IMMEDIATE) 1620 offset = arg->a_un.a_val; 1621 else 1622 offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str); 1623 } 1624 1625 hash = SMAP_HASHFUNC(addr, offset); 1626 1627 if (mdb_vread(&saddr, sizeof (saddr), 1628 smd_hash + hash * sizeof (uintptr_t)) == -1) { 1629 mdb_warn("couldn't read smap at %p", 1630 smd_hash + hash * sizeof (uintptr_t)); 1631 return (DCMD_ERR); 1632 } 1633 1634 do { 1635 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) { 1636 mdb_warn("couldn't read smap at %p", saddr); 1637 return (DCMD_ERR); 1638 } 1639 1640 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) { 1641 mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n", 1642 addr, offset, saddr, ((saddr - smd_smap) / 1643 sizeof (smp)) * MAXBSIZE + seg.s_base); 1644 return (DCMD_OK); 1645 } 1646 1647 saddr = (uintptr_t)smp.sm_hash; 1648 } while (saddr != NULL); 1649 1650 mdb_printf("no smap for vnode %p, offs %p\n", addr, offset); 1651 return (DCMD_OK); 1652 } 1653 1654 /*ARGSUSED*/ 1655 int 1656 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1657 { 1658 uintptr_t kaddr; 1659 struct seg seg; 1660 struct segmap_data sd; 1661 1662 if (!(flags & DCMD_ADDRSPEC)) 1663 return (DCMD_USAGE); 1664 1665 if (mdb_readvar(&kaddr, "segkmap") == -1) { 1666 mdb_warn("failed to read segkmap"); 1667 return (DCMD_ERR); 1668 } 1669 1670 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) { 1671 mdb_warn("failed to read segkmap at %p", kaddr); 1672 return (DCMD_ERR); 1673 } 1674 1675 if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) { 1676 mdb_warn("failed to read segmap_data at %p", seg.s_data); 1677 return (DCMD_ERR); 1678 } 1679 1680 mdb_printf("%p is smap %p\n", addr, 1681 ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) * 1682 sizeof (struct smap) + (uintptr_t)sd.smd_sm); 1683 1684 return (DCMD_OK); 1685 }