1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2017 Joyent, Inc.
24 */
25
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ks.h>
29 #include <sys/types.h>
30 #include <sys/memlist.h>
31 #include <sys/swap.h>
32 #include <sys/systm.h>
33 #include <sys/thread.h>
34 #include <vm/anon.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/thread.h>
38 #include <sys/swap.h>
39 #include <sys/memlist.h>
40 #include <sys/vnode.h>
41 #include <vm/seg_map.h>
42 #include <vm/seg_vn.h>
43 #include <vm/seg_hole.h>
44
45 #include "avl.h"
46 #include "memory.h"
47
48 /*
49 * Page walker.
50 * By default, this will walk all pages in the system. If given an
51 * address, it will walk all pages belonging to the vnode at that
52 * address.
53 */
54
55 /*
56 * page_walk_data
57 *
58 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
59 * number of hash locations remaining in the page hash table when
60 * walking all pages.
61 *
62 * The astute reader will notice that pw_hashloc is only used when
63 * reading all pages (to hold a pointer to our location in the page
64 * hash table), and that pw_first is only used when reading the pages
65 * belonging to a particular vnode (to hold a pointer to the first
66 * page). While these could be combined to be a single pointer, they
67 * are left separate for clarity.
68 */
69 typedef struct page_walk_data {
70 long pw_hashleft;
71 void **pw_hashloc;
72 uintptr_t pw_first;
73 } page_walk_data_t;
74
75 int
76 page_walk_init(mdb_walk_state_t *wsp)
77 {
78 page_walk_data_t *pwd;
79 void **ptr;
80 size_t hashsz;
81 vnode_t vn;
82
83 if (wsp->walk_addr == NULL) {
84
85 /*
86 * Walk all pages
87 */
88
89 if ((mdb_readvar(&ptr, "page_hash") == -1) ||
90 (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
91 (ptr == NULL) || (hashsz == 0)) {
92 mdb_warn("page_hash, page_hashsz not found or invalid");
93 return (WALK_ERR);
94 }
95
96 /*
97 * Since we are walking all pages, initialize hashleft
98 * to be the remaining number of entries in the page
99 * hash. hashloc is set the start of the page hash
100 * table. Setting the walk address to 0 indicates that
101 * we aren't currently following a hash chain, and that
102 * we need to scan the page hash table for a page.
103 */
104 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
105 pwd->pw_hashleft = hashsz;
106 pwd->pw_hashloc = ptr;
107 wsp->walk_addr = 0;
108 } else {
109
110 /*
111 * Walk just this vnode
112 */
113
114 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
115 mdb_warn("unable to read vnode_t at %#lx",
116 wsp->walk_addr);
117 return (WALK_ERR);
118 }
119
120 /*
121 * We set hashleft to -1 to indicate that we are
122 * walking a vnode, and initialize first to 0 (it is
123 * used to terminate the walk, so it must not be set
124 * until after we have walked the first page). The
125 * walk address is set to the first page.
126 */
127 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
128 pwd->pw_hashleft = -1;
129 pwd->pw_first = 0;
130
131 wsp->walk_addr = (uintptr_t)vn.v_pages;
132 }
133
134 wsp->walk_data = pwd;
135
136 return (WALK_NEXT);
137 }
138
139 int
140 page_walk_step(mdb_walk_state_t *wsp)
141 {
142 page_walk_data_t *pwd = wsp->walk_data;
143 page_t page;
144 uintptr_t pp;
145
146 pp = wsp->walk_addr;
147
148 if (pwd->pw_hashleft < 0) {
149
150 /* We're walking a vnode's pages */
151
152 /*
153 * If we don't have any pages to walk, we have come
154 * back around to the first one (we finished), or we
155 * can't read the page we're looking at, we are done.
156 */
157 if (pp == NULL || pp == pwd->pw_first)
158 return (WALK_DONE);
159 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
160 mdb_warn("unable to read page_t at %#lx", pp);
161 return (WALK_ERR);
162 }
163
164 /*
165 * Set the walk address to the next page, and if the
166 * first page hasn't been set yet (i.e. we are on the
167 * first page), set it.
168 */
169 wsp->walk_addr = (uintptr_t)page.p_vpnext;
170 if (pwd->pw_first == NULL)
171 pwd->pw_first = pp;
172
173 } else if (pwd->pw_hashleft > 0) {
174
175 /* We're walking all pages */
176
177 /*
178 * If pp (the walk address) is NULL, we scan through
179 * the page hash table until we find a page.
180 */
181 if (pp == NULL) {
182
183 /*
184 * Iterate through the page hash table until we
185 * find a page or reach the end.
186 */
187 do {
188 if (mdb_vread(&pp, sizeof (uintptr_t),
189 (uintptr_t)pwd->pw_hashloc) == -1) {
190 mdb_warn("unable to read from %#p",
191 pwd->pw_hashloc);
192 return (WALK_ERR);
193 }
194 pwd->pw_hashleft--;
195 pwd->pw_hashloc++;
196 } while (pwd->pw_hashleft && (pp == NULL));
197
198 /*
199 * We've reached the end; exit.
200 */
201 if (pp == NULL)
202 return (WALK_DONE);
203 }
204
205 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
206 mdb_warn("unable to read page_t at %#lx", pp);
207 return (WALK_ERR);
208 }
209
210 /*
211 * Set the walk address to the next page.
212 */
213 wsp->walk_addr = (uintptr_t)page.p_hash;
214
215 } else {
216 /* We've finished walking all pages. */
217 return (WALK_DONE);
218 }
219
220 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
221 }
222
223 void
224 page_walk_fini(mdb_walk_state_t *wsp)
225 {
226 mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
227 }
228
229 /*
230 * allpages walks all pages in the system in order they appear in
231 * the memseg structure
232 */
233
234 #define PAGE_BUFFER 128
235
236 int
237 allpages_walk_init(mdb_walk_state_t *wsp)
238 {
239 if (wsp->walk_addr != 0) {
240 mdb_warn("allpages only supports global walks.\n");
241 return (WALK_ERR);
242 }
243
244 if (mdb_layered_walk("memseg", wsp) == -1) {
245 mdb_warn("couldn't walk 'memseg'");
246 return (WALK_ERR);
247 }
248
249 wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
250 return (WALK_NEXT);
251 }
252
253 int
254 allpages_walk_step(mdb_walk_state_t *wsp)
255 {
256 const struct memseg *msp = wsp->walk_layer;
257 page_t *buf = wsp->walk_data;
258 size_t pg_read, i;
259 size_t pg_num = msp->pages_end - msp->pages_base;
260 const page_t *pg_addr = msp->pages;
261
262 while (pg_num > 0) {
263 pg_read = MIN(pg_num, PAGE_BUFFER);
264
265 if (mdb_vread(buf, pg_read * sizeof (page_t),
266 (uintptr_t)pg_addr) == -1) {
267 mdb_warn("can't read page_t's at %#lx", pg_addr);
268 return (WALK_ERR);
269 }
270 for (i = 0; i < pg_read; i++) {
271 int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
272 &buf[i], wsp->walk_cbdata);
273
274 if (ret != WALK_NEXT)
275 return (ret);
276 }
277 pg_num -= pg_read;
278 pg_addr += pg_read;
279 }
280
281 return (WALK_NEXT);
282 }
283
284 void
285 allpages_walk_fini(mdb_walk_state_t *wsp)
286 {
287 mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
288 }
289
290 /*
291 * Hash table + LRU queue.
292 * This table is used to cache recently read vnodes for the memstat
293 * command, to reduce the number of mdb_vread calls. This greatly
294 * speeds the memstat command on on live, large CPU count systems.
295 */
296
297 #define VN_SMALL 401
298 #define VN_LARGE 10007
299 #define VN_HTABLE_KEY(p, hp) ((p) % ((hp)->vn_htable_buckets))
300
301 struct vn_htable_list {
302 uint_t vn_flag; /* v_flag from vnode */
303 uintptr_t vn_ptr; /* pointer to vnode */
304 struct vn_htable_list *vn_q_next; /* queue next pointer */
305 struct vn_htable_list *vn_q_prev; /* queue prev pointer */
306 struct vn_htable_list *vn_h_next; /* hash table pointer */
307 };
308
309 /*
310 * vn_q_first -> points to to head of queue: the vnode that was most
311 * recently used
312 * vn_q_last -> points to the oldest used vnode, and is freed once a new
313 * vnode is read.
314 * vn_htable -> hash table
315 * vn_htable_buf -> contains htable objects
316 * vn_htable_size -> total number of items in the hash table
317 * vn_htable_buckets -> number of buckets in the hash table
318 */
319 typedef struct vn_htable {
320 struct vn_htable_list *vn_q_first;
321 struct vn_htable_list *vn_q_last;
322 struct vn_htable_list **vn_htable;
323 struct vn_htable_list *vn_htable_buf;
324 int vn_htable_size;
325 int vn_htable_buckets;
326 } vn_htable_t;
327
328
329 /* allocate memory, initilize hash table and LRU queue */
330 static void
331 vn_htable_init(vn_htable_t *hp, size_t vn_size)
332 {
333 int i;
334 int htable_size = MAX(vn_size, VN_LARGE);
335
336 if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
337 * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
338 htable_size = VN_SMALL;
339 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
340 * htable_size, UM_SLEEP|UM_GC);
341 }
342
343 hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
344 * htable_size, UM_SLEEP|UM_GC);
345
346 hp->vn_q_first = &hp->vn_htable_buf[0];
347 hp->vn_q_last = &hp->vn_htable_buf[htable_size - 1];
348 hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
349 hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
350
351 for (i = 1; i < (htable_size-1); i++) {
352 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
353 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
354 }
355
356 hp->vn_htable_size = htable_size;
357 hp->vn_htable_buckets = htable_size;
358 }
359
360
361 /*
362 * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
363 * The function tries to find needed information in the following order:
364 *
365 * 1. check if ptr is the first in queue
366 * 2. check if ptr is in hash table (if so move it to the top of queue)
367 * 3. do mdb_vread, remove last queue item from queue and hash table.
368 * Insert new information to freed object, and put this object in to the
369 * top of the queue.
370 */
371 static int
372 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
373 {
374 int hkey;
375 struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
376 struct vn_htable_list *q_first = hp->vn_q_first;
377
378 /* 1. vnode ptr is the first in queue, just get v_flag and return */
379 if (q_first->vn_ptr == ptr) {
380 vp->v_flag = q_first->vn_flag;
381
382 return (0);
383 }
384
385 /* 2. search the hash table for this ptr */
386 hkey = VN_HTABLE_KEY(ptr, hp);
387 hent = hp->vn_htable[hkey];
388 while (hent && (hent->vn_ptr != ptr))
389 hent = hent->vn_h_next;
390
391 /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
392 if (hent == NULL) {
393 struct vnode vn;
394
395 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
396 mdb_warn("unable to read vnode_t at %#lx", ptr);
397 return (-1);
398 }
399
400 /* we will insert read data into the last element in queue */
401 hent = hp->vn_q_last;
402
403 /* remove last hp->vn_q_last object from hash table */
404 if (hent->vn_ptr) {
405 htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
406 while (*htmp != hent)
407 htmp = &(*htmp)->vn_h_next;
408 *htmp = hent->vn_h_next;
409 }
410
411 /* insert data into new free object */
412 hent->vn_ptr = ptr;
413 hent->vn_flag = vn.v_flag;
414
415 /* insert new object into hash table */
416 hent->vn_h_next = hp->vn_htable[hkey];
417 hp->vn_htable[hkey] = hent;
418 }
419
420 /* Remove from queue. hent is not first, vn_q_prev is not NULL */
421 q_next = hent->vn_q_next;
422 q_prev = hent->vn_q_prev;
423 if (q_next == NULL)
424 hp->vn_q_last = q_prev;
425 else
426 q_next->vn_q_prev = q_prev;
427 q_prev->vn_q_next = q_next;
428
429 /* Add to the front of queue */
430 hent->vn_q_prev = NULL;
431 hent->vn_q_next = q_first;
432 q_first->vn_q_prev = hent;
433 hp->vn_q_first = hent;
434
435 /* Set v_flag in vnode pointer from hent */
436 vp->v_flag = hent->vn_flag;
437
438 return (0);
439 }
440
441 /* Summary statistics of pages */
442 typedef struct memstat {
443 struct vnode *ms_kvp; /* Cached address of kernel vnode */
444 struct vnode *ms_unused_vp; /* Unused pages vnode pointer */
445 struct vnode *ms_zvp; /* Cached address of zio vnode */
446 uint64_t ms_kmem; /* Pages of kernel memory */
447 uint64_t ms_zfs_data; /* Pages of zfs data */
448 uint64_t ms_anon; /* Pages of anonymous memory */
449 uint64_t ms_vnode; /* Pages of named (vnode) memory */
450 uint64_t ms_exec; /* Pages of exec/library memory */
451 uint64_t ms_cachelist; /* Pages on the cachelist (free) */
452 uint64_t ms_bootpages; /* Pages on the bootpages list */
453 uint64_t ms_total; /* Pages on page hash */
454 vn_htable_t *ms_vn_htable; /* Pointer to hash table */
455 struct vnode ms_vn; /* vnode buffer */
456 } memstat_t;
457
458 #define MS_PP_ISKAS(pp, stats) \
459 ((pp)->p_vnode == (stats)->ms_kvp)
460
461 #define MS_PP_ISZFS_DATA(pp, stats) \
462 (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
463
464 /*
465 * Summarize pages by type and update stat information
466 */
467
468 /* ARGSUSED */
469 static int
470 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
471 {
472 struct vnode *vp = &stats->ms_vn;
473
474 if (PP_ISBOOTPAGES(pp))
475 stats->ms_bootpages++;
476 else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
477 return (WALK_NEXT);
478 else if (MS_PP_ISKAS(pp, stats))
479 stats->ms_kmem++;
480 else if (MS_PP_ISZFS_DATA(pp, stats))
481 stats->ms_zfs_data++;
482 else if (PP_ISFREE(pp))
483 stats->ms_cachelist++;
484 else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
485 return (WALK_ERR);
486 else if (IS_SWAPFSVP(vp))
487 stats->ms_anon++;
488 else if ((vp->v_flag & VVMEXEC) != 0)
489 stats->ms_exec++;
490 else
491 stats->ms_vnode++;
492
493 stats->ms_total++;
494
495 return (WALK_NEXT);
496 }
497
498 /* ARGSUSED */
499 int
500 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
501 {
502 pgcnt_t total_pages, physmem;
503 ulong_t freemem;
504 memstat_t stats;
505 GElf_Sym sym;
506 vn_htable_t ht;
507 struct vnode *kvps;
508 uintptr_t vn_size = 0;
509
510 bzero(&stats, sizeof (memstat_t));
511
512 /*
513 * -s size, is an internal option. It specifies the size of vn_htable.
514 * Hash table size is set in the following order:
515 * If user has specified the size that is larger than VN_LARGE: try it,
516 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
517 * failed to allocate default to VN_SMALL.
518 * For a better efficiency of hash table it is highly recommended to
519 * set size to a prime number.
520 */
521 if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
522 's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
523 return (DCMD_USAGE);
524
525 /* Initialize vnode hash list and queue */
526 vn_htable_init(&ht, vn_size);
527 stats.ms_vn_htable = &ht;
528
529 /* Total physical memory */
530 if (mdb_readvar(&total_pages, "total_pages") == -1) {
531 mdb_warn("unable to read total_pages");
532 return (DCMD_ERR);
533 }
534
535 /* Artificially limited memory */
536 if (mdb_readvar(&physmem, "physmem") == -1) {
537 mdb_warn("unable to read physmem");
538 return (DCMD_ERR);
539 }
540
541 /* read kernel vnode array pointer */
542 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
543 (GElf_Sym *)&sym) == -1) {
544 mdb_warn("unable to read kvps");
545 return (DCMD_ERR);
546 }
547 kvps = (struct vnode *)(uintptr_t)sym.st_value;
548 stats.ms_kvp = &kvps[KV_KVP];
549
550 /*
551 * Read the zio vnode pointer.
552 */
553 stats.ms_zvp = &kvps[KV_ZVP];
554
555 /*
556 * If physmem != total_pages, then the administrator has limited the
557 * number of pages available in the system. Excluded pages are
558 * associated with the unused pages vnode. Read this vnode so the
559 * pages can be excluded in the page accounting.
560 */
561 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
562 (GElf_Sym *)&sym) == -1) {
563 mdb_warn("unable to read unused_pages_vp");
564 return (DCMD_ERR);
565 }
566 stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
567
568 /* walk all pages, collect statistics */
569 if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
570 &stats) == -1) {
571 mdb_warn("can't walk memseg");
572 return (DCMD_ERR);
573 }
574
575 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
576 ((physmem) * 10)))
577
578 mdb_printf("Page Summary Pages MB"
579 " %%Tot\n");
580 mdb_printf("------------ ---------------- ----------------"
581 " ----\n");
582 mdb_printf("Kernel %16llu %16llu %3lu%%\n",
583 stats.ms_kmem,
584 (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
585 MS_PCT_TOTAL(stats.ms_kmem));
586
587 if (stats.ms_bootpages != 0) {
588 mdb_printf("Boot pages %16llu %16llu %3lu%%\n",
589 stats.ms_bootpages,
590 (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024),
591 MS_PCT_TOTAL(stats.ms_bootpages));
592 }
593
594 if (stats.ms_zfs_data != 0) {
595 mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n",
596 stats.ms_zfs_data,
597 (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
598 MS_PCT_TOTAL(stats.ms_zfs_data));
599 }
600
601 mdb_printf("Anon %16llu %16llu %3lu%%\n",
602 stats.ms_anon,
603 (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
604 MS_PCT_TOTAL(stats.ms_anon));
605 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n",
606 stats.ms_exec,
607 (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
608 MS_PCT_TOTAL(stats.ms_exec));
609 mdb_printf("Page cache %16llu %16llu %3lu%%\n",
610 stats.ms_vnode,
611 (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
612 MS_PCT_TOTAL(stats.ms_vnode));
613 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n",
614 stats.ms_cachelist,
615 (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
616 MS_PCT_TOTAL(stats.ms_cachelist));
617
618 /*
619 * occasionally, we double count pages above. To avoid printing
620 * absurdly large values for freemem, we clamp it at zero.
621 */
622 if (physmem > stats.ms_total)
623 freemem = physmem - stats.ms_total;
624 else
625 freemem = 0;
626
627 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem,
628 (uint64_t)freemem * PAGESIZE / (1024 * 1024),
629 MS_PCT_TOTAL(freemem));
630
631 mdb_printf("\nTotal %16lu %16lu\n",
632 physmem,
633 (uint64_t)physmem * PAGESIZE / (1024 * 1024));
634
635 if (physmem != total_pages) {
636 mdb_printf("Physical %16lu %16lu\n",
637 total_pages,
638 (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
639 }
640
641 #undef MS_PCT_TOTAL
642
643 return (DCMD_OK);
644 }
645
646 void
647 pagelookup_help(void)
648 {
649 mdb_printf(
650 "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
651 "\n"
652 "Can be invoked three different ways:\n\n"
653 " ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
654 " %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
655 " %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
656 "\n"
657 "The latter two forms are useful in pipelines.\n");
658 }
659
660 int
661 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
662 {
663 uintptr_t vp = -(uintptr_t)1;
664 uint64_t offset = -(uint64_t)1;
665
666 uintptr_t pageaddr;
667 int hasaddr = (flags & DCMD_ADDRSPEC);
668 int usedaddr = 0;
669
670 if (mdb_getopts(argc, argv,
671 'v', MDB_OPT_UINTPTR, &vp,
672 'o', MDB_OPT_UINT64, &offset,
673 0) != argc) {
674 return (DCMD_USAGE);
675 }
676
677 if (vp == -(uintptr_t)1) {
678 if (offset == -(uint64_t)1) {
679 mdb_warn(
680 "pagelookup: at least one of -v vp or -o offset "
681 "required.\n");
682 return (DCMD_USAGE);
683 }
684 vp = addr;
685 usedaddr = 1;
686 } else if (offset == -(uint64_t)1) {
687 offset = mdb_get_dot();
688 usedaddr = 1;
689 }
690 if (usedaddr && !hasaddr) {
691 mdb_warn("pagelookup: address required\n");
692 return (DCMD_USAGE);
693 }
694 if (!usedaddr && hasaddr) {
695 mdb_warn(
696 "pagelookup: address specified when both -v and -o were "
697 "passed");
698 return (DCMD_USAGE);
699 }
700
701 pageaddr = mdb_page_lookup(vp, offset);
702 if (pageaddr == 0) {
703 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
704 vp, offset);
705 return (DCMD_OK);
706 }
707 mdb_printf("%#lr\n", pageaddr); /* this is PIPE_OUT friendly */
708 return (DCMD_OK);
709 }
710
711 /*ARGSUSED*/
712 int
713 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
714 {
715 uintptr_t pp;
716
717 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
718 return (DCMD_USAGE);
719 }
720
721 pp = mdb_pfn2page((pfn_t)addr);
722 if (pp == 0) {
723 return (DCMD_ERR);
724 }
725
726 if (flags & DCMD_PIPE_OUT) {
727 mdb_printf("%#lr\n", pp);
728 } else {
729 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
730 }
731
732 return (DCMD_OK);
733 }
734
735 int
736 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
737 {
738 page_t p;
739
740 if (!(flags & DCMD_ADDRSPEC)) {
741 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
742 mdb_warn("can't walk pages");
743 return (DCMD_ERR);
744 }
745 return (DCMD_OK);
746 }
747
748 if (DCMD_HDRSPEC(flags)) {
749 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
750 "PAGE", "VNODE", "OFFSET", "SELOCK",
751 "LCT", "COW", "IO", "FS", "ST");
752 }
753
754 if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
755 mdb_warn("can't read page_t at %#lx", addr);
756 return (DCMD_ERR);
757 }
758
759 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
760 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
761 p.p_iolock_state, p.p_fsdata, p.p_state);
762
763 return (DCMD_OK);
764 }
765
766 int
767 swap_walk_init(mdb_walk_state_t *wsp)
768 {
769 void *ptr;
770
771 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
772 mdb_warn("swapinfo not found or invalid");
773 return (WALK_ERR);
774 }
775
776 wsp->walk_addr = (uintptr_t)ptr;
777
778 return (WALK_NEXT);
779 }
780
781 int
782 swap_walk_step(mdb_walk_state_t *wsp)
783 {
784 uintptr_t sip;
785 struct swapinfo si;
786
787 sip = wsp->walk_addr;
788
789 if (sip == NULL)
790 return (WALK_DONE);
791
792 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
793 mdb_warn("unable to read swapinfo at %#lx", sip);
794 return (WALK_ERR);
795 }
796
797 wsp->walk_addr = (uintptr_t)si.si_next;
798
799 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
800 }
801
802 int
803 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
804 {
805 struct swapinfo si;
806 char *name;
807
808 if (!(flags & DCMD_ADDRSPEC)) {
809 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
810 mdb_warn("can't walk swapinfo");
811 return (DCMD_ERR);
812 }
813 return (DCMD_OK);
814 }
815
816 if (DCMD_HDRSPEC(flags)) {
817 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
818 "ADDR", "VNODE", "PAGES", "FREE", "NAME");
819 }
820
821 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
822 mdb_warn("can't read swapinfo at %#lx", addr);
823 return (DCMD_ERR);
824 }
825
826 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
827 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
828 name = "*error*";
829
830 mdb_printf("%0?lx %?p %9d %9d %s\n",
831 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
832
833 return (DCMD_OK);
834 }
835
836 int
837 memlist_walk_step(mdb_walk_state_t *wsp)
838 {
839 uintptr_t mlp;
840 struct memlist ml;
841
842 mlp = wsp->walk_addr;
843
844 if (mlp == NULL)
845 return (WALK_DONE);
846
847 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
848 mdb_warn("unable to read memlist at %#lx", mlp);
849 return (WALK_ERR);
850 }
851
852 wsp->walk_addr = (uintptr_t)ml.ml_next;
853
854 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
855 }
856
857 int
858 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
859 {
860 struct memlist ml;
861
862 if (!(flags & DCMD_ADDRSPEC)) {
863 uintptr_t ptr;
864 uint_t list = 0;
865 int i;
866 static const char *lists[] = {
867 "phys_install",
868 "phys_avail",
869 "virt_avail"
870 };
871
872 if (mdb_getopts(argc, argv,
873 'i', MDB_OPT_SETBITS, (1 << 0), &list,
874 'a', MDB_OPT_SETBITS, (1 << 1), &list,
875 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
876 return (DCMD_USAGE);
877
878 if (!list)
879 list = 1;
880
881 for (i = 0; list; i++, list >>= 1) {
882 if (!(list & 1))
883 continue;
884 if ((mdb_readvar(&ptr, lists[i]) == -1) ||
885 (ptr == NULL)) {
886 mdb_warn("%s not found or invalid", lists[i]);
887 return (DCMD_ERR);
888 }
889
890 mdb_printf("%s:\n", lists[i]);
891 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
892 ptr) == -1) {
893 mdb_warn("can't walk memlist");
894 return (DCMD_ERR);
895 }
896 }
897 return (DCMD_OK);
898 }
899
900 if (DCMD_HDRSPEC(flags))
901 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
902
903 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
904 mdb_warn("can't read memlist at %#lx", addr);
905 return (DCMD_ERR);
906 }
907
908 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
909
910 return (DCMD_OK);
911 }
912
913 int
914 seg_walk_init(mdb_walk_state_t *wsp)
915 {
916 if (wsp->walk_addr == NULL) {
917 mdb_warn("seg walk must begin at struct as *\n");
918 return (WALK_ERR);
919 }
920
921 /*
922 * this is really just a wrapper to AVL tree walk
923 */
924 wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
925 return (avl_walk_init(wsp));
926 }
927
928 /*ARGSUSED*/
929 int
930 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
931 {
932 struct seg s;
933
934 if (argc != 0)
935 return (DCMD_USAGE);
936
937 if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
938 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
939 "SEG", "BASE", "SIZE", "DATA", "OPS");
940 }
941
942 if (mdb_vread(&s, sizeof (s), addr) == -1) {
943 mdb_warn("failed to read seg at %p", addr);
944 return (DCMD_ERR);
945 }
946
947 mdb_printf("%?p %?p %?lx %?p %a\n",
948 addr, s.s_base, s.s_size, s.s_data, s.s_ops);
949
950 return (DCMD_OK);
951 }
952
953 typedef struct pmap_walk_types {
954 uintptr_t pwt_segvn;
955 uintptr_t pwt_seghole;
956 } pmap_walk_types_t;
957
958 /*ARGSUSED*/
959 static int
960 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
961 {
962 pgcnt_t *nres = out;
963
964 (*nres)++;
965
966 return (WALK_NEXT);
967 }
968
969 static int
970 pmap_walk_seg(uintptr_t addr, const struct seg *seg,
971 const pmap_walk_types_t *types)
972 {
973 const uintptr_t ops = (uintptr_t)seg->s_ops;
974
975 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
976
977 if (ops == types->pwt_segvn && seg->s_data != NULL) {
978 struct segvn_data svn;
979 pgcnt_t nres = 0;
980
981 svn.vp = NULL;
982 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
983
984 /*
985 * Use the segvn_pages walker to find all of the in-core pages
986 * for this mapping.
987 */
988 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
989 (uintptr_t)seg->s_data) == -1) {
990 mdb_warn("failed to walk segvn_pages (s_data=%p)",
991 seg->s_data);
992 }
993 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
994
995 if (svn.vp != NULL) {
996 char buf[29];
997
998 mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
999 mdb_printf(" %s", buf);
1000 } else {
1001 mdb_printf(" [ anon ]");
1002 }
1003 } else if (ops == types->pwt_seghole && seg->s_data != NULL) {
1004 seghole_data_t shd;
1005 char name[16];
1006
1007 (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data);
1008 if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name),
1009 (uintptr_t)shd.shd_name) == 0) {
1010 name[0] = '\0';
1011 }
1012
1013 mdb_printf(" %8s [ hole%s%s ]", "-",
1014 name[0] == '0' ? "" : ":", name);
1015 } else {
1016 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1017 }
1018
1019 mdb_printf("\n");
1020 return (WALK_NEXT);
1021 }
1022
1023 static int
1024 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg,
1025 const pmap_walk_types_t *types)
1026 {
1027 const uintptr_t ops = (uintptr_t)seg->s_ops;
1028
1029 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1030
1031 if (ops == types->pwt_segvn && seg->s_data != NULL) {
1032 struct segvn_data svn;
1033
1034 svn.vp = NULL;
1035 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1036
1037 if (svn.vp != NULL) {
1038 mdb_printf(" %0?p", svn.vp);
1039 } else {
1040 mdb_printf(" [ anon ]");
1041 }
1042 } else {
1043 mdb_printf(" [ &%a ]", seg->s_ops);
1044 }
1045
1046 mdb_printf("\n");
1047 return (WALK_NEXT);
1048 }
1049
1050 /*ARGSUSED*/
1051 int
1052 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1053 {
1054 proc_t proc;
1055 uint_t quick = FALSE;
1056 mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1057 pmap_walk_types_t wtypes = { 0 };
1058
1059 GElf_Sym sym;
1060
1061 if (!(flags & DCMD_ADDRSPEC))
1062 return (DCMD_USAGE);
1063
1064 if (mdb_getopts(argc, argv,
1065 'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1066 return (DCMD_USAGE);
1067
1068 if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1069 mdb_warn("failed to read proc at %p", addr);
1070 return (DCMD_ERR);
1071 }
1072
1073 if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1074 wtypes.pwt_segvn = (uintptr_t)sym.st_value;
1075 if (mdb_lookup_by_name("seghole_ops", &sym) == 0)
1076 wtypes.pwt_seghole = (uintptr_t)sym.st_value;
1077
1078 mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1079
1080 if (quick) {
1081 mdb_printf("VNODE\n");
1082 cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1083 } else {
1084 mdb_printf("%8s %s\n", "RES", "PATH");
1085 }
1086
1087 if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) {
1088 mdb_warn("failed to walk segments of as %p", proc.p_as);
1089 return (DCMD_ERR);
1090 }
1091
1092 return (DCMD_OK);
1093 }
1094
1095 typedef struct anon_walk_data {
1096 uintptr_t *aw_levone;
1097 uintptr_t *aw_levtwo;
1098 size_t aw_minslot;
1099 size_t aw_maxslot;
1100 pgcnt_t aw_nlevone;
1101 pgcnt_t aw_levone_ndx;
1102 size_t aw_levtwo_ndx;
1103 struct anon_map *aw_ampp;
1104 struct anon_map aw_amp;
1105 struct anon_hdr aw_ahp;
1106 int aw_all; /* report all anon pointers, even NULLs */
1107 } anon_walk_data_t;
1108
1109 int
1110 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1111 {
1112 anon_walk_data_t *aw;
1113
1114 if (wsp->walk_addr == NULL) {
1115 mdb_warn("anon walk doesn't support global walks\n");
1116 return (WALK_ERR);
1117 }
1118
1119 aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1120 aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1121
1122 if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1123 mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1124 mdb_free(aw, sizeof (anon_walk_data_t));
1125 return (WALK_ERR);
1126 }
1127
1128 if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1129 (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1130 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1131 mdb_free(aw, sizeof (anon_walk_data_t));
1132 return (WALK_ERR);
1133 }
1134
1135 /* update min and maxslot with the given constraints */
1136 maxslot = MIN(maxslot, aw->aw_ahp.size);
1137 minslot = MIN(minslot, maxslot);
1138
1139 if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1140 (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1141 aw->aw_nlevone = maxslot;
1142 aw->aw_levone_ndx = minslot;
1143 aw->aw_levtwo = NULL;
1144 } else {
1145 aw->aw_nlevone =
1146 (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1147 aw->aw_levone_ndx = 0;
1148 aw->aw_levtwo =
1149 mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1150 }
1151
1152 aw->aw_levone =
1153 mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1154 aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1155
1156 mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1157 (uintptr_t)aw->aw_ahp.array_chunk);
1158
1159 aw->aw_levtwo_ndx = 0;
1160 aw->aw_minslot = minslot;
1161 aw->aw_maxslot = maxslot;
1162
1163 out:
1164 wsp->walk_data = aw;
1165 return (0);
1166 }
1167
1168 int
1169 anon_walk_step(mdb_walk_state_t *wsp)
1170 {
1171 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1172 struct anon anon;
1173 uintptr_t anonptr;
1174 ulong_t slot;
1175
1176 /*
1177 * Once we've walked through level one, we're done.
1178 */
1179 if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1180 return (WALK_DONE);
1181 }
1182
1183 if (aw->aw_levtwo == NULL) {
1184 anonptr = aw->aw_levone[aw->aw_levone_ndx];
1185 aw->aw_levone_ndx++;
1186 } else {
1187 if (aw->aw_levtwo_ndx == 0) {
1188 uintptr_t levtwoptr;
1189
1190 /* The first time through, skip to our first index. */
1191 if (aw->aw_levone_ndx == 0) {
1192 aw->aw_levone_ndx =
1193 aw->aw_minslot / ANON_CHUNK_SIZE;
1194 aw->aw_levtwo_ndx =
1195 aw->aw_minslot % ANON_CHUNK_SIZE;
1196 }
1197
1198 levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1199
1200 if (levtwoptr == NULL) {
1201 if (!aw->aw_all) {
1202 aw->aw_levtwo_ndx = 0;
1203 aw->aw_levone_ndx++;
1204 return (WALK_NEXT);
1205 }
1206 bzero(aw->aw_levtwo,
1207 ANON_CHUNK_SIZE * sizeof (uintptr_t));
1208
1209 } else if (mdb_vread(aw->aw_levtwo,
1210 ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1211 -1) {
1212 mdb_warn("unable to read anon_map %p's "
1213 "second-level map %d at %p",
1214 aw->aw_ampp, aw->aw_levone_ndx,
1215 levtwoptr);
1216 return (WALK_ERR);
1217 }
1218 }
1219 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1220 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1221
1222 /* update the indices for next time */
1223 aw->aw_levtwo_ndx++;
1224 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1225 aw->aw_levtwo_ndx = 0;
1226 aw->aw_levone_ndx++;
1227 }
1228
1229 /* make sure the slot # is in the requested range */
1230 if (slot >= aw->aw_maxslot) {
1231 return (WALK_DONE);
1232 }
1233 }
1234
1235 if (anonptr != NULL) {
1236 mdb_vread(&anon, sizeof (anon), anonptr);
1237 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1238 }
1239 if (aw->aw_all) {
1240 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1241 }
1242 return (WALK_NEXT);
1243 }
1244
1245 void
1246 anon_walk_fini(mdb_walk_state_t *wsp)
1247 {
1248 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1249
1250 if (aw->aw_levtwo != NULL)
1251 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1252
1253 mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1254 mdb_free(aw, sizeof (anon_walk_data_t));
1255 }
1256
1257 int
1258 anon_walk_init(mdb_walk_state_t *wsp)
1259 {
1260 return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1261 }
1262
1263 int
1264 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1265 {
1266 const uintptr_t svd_addr = wsp->walk_addr;
1267 uintptr_t amp_addr;
1268 uintptr_t seg_addr;
1269 struct segvn_data svd;
1270 struct anon_map amp;
1271 struct seg seg;
1272
1273 if (svd_addr == NULL) {
1274 mdb_warn("segvn_anon walk doesn't support global walks\n");
1275 return (WALK_ERR);
1276 }
1277 if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1278 mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1279 svd_addr);
1280 return (WALK_ERR);
1281 }
1282 if (svd.amp == NULL) {
1283 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1284 svd_addr);
1285 return (WALK_ERR);
1286 }
1287 amp_addr = (uintptr_t)svd.amp;
1288 if (mdb_vread(&, sizeof (amp), amp_addr) == -1) {
1289 mdb_warn("segvn_anon walk: unable to read amp %p for "
1290 "segvn_data %p", amp_addr, svd_addr);
1291 return (WALK_ERR);
1292 }
1293 seg_addr = (uintptr_t)svd.seg;
1294 if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1295 mdb_warn("segvn_anon walk: unable to read seg %p for "
1296 "segvn_data %p", seg_addr, svd_addr);
1297 return (WALK_ERR);
1298 }
1299 if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1300 mdb_warn("anon map %p is too small for segment %p\n",
1301 amp_addr, seg_addr);
1302 return (WALK_ERR);
1303 }
1304
1305 wsp->walk_addr = amp_addr;
1306 return (anon_walk_init_common(wsp,
1307 svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1308 }
1309
1310
1311 typedef struct {
1312 u_offset_t svs_offset;
1313 uintptr_t svs_page;
1314 } segvn_sparse_t;
1315 #define SEGVN_MAX_SPARSE ((128 * 1024) / sizeof (segvn_sparse_t))
1316
1317 typedef struct {
1318 uintptr_t svw_svdp;
1319 struct segvn_data svw_svd;
1320 struct seg svw_seg;
1321 size_t svw_walkoff;
1322 ulong_t svw_anonskip;
1323 segvn_sparse_t *svw_sparse;
1324 size_t svw_sparse_idx;
1325 size_t svw_sparse_count;
1326 size_t svw_sparse_size;
1327 uint8_t svw_sparse_overflow;
1328 uint8_t svw_all;
1329 } segvn_walk_data_t;
1330
1331 static int
1332 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1333 {
1334 segvn_walk_data_t *const svw = arg;
1335 const page_t *const pp = pp_arg;
1336 const u_offset_t offset = pp->p_offset;
1337 segvn_sparse_t *const cur =
1338 &svw->svw_sparse[svw->svw_sparse_count];
1339
1340 /* See if the page is of interest */
1341 if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1342 return (WALK_NEXT);
1343 }
1344 /* See if we have space for the new entry, then add it. */
1345 if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1346 svw->svw_sparse_overflow = 1;
1347 return (WALK_DONE);
1348 }
1349 svw->svw_sparse_count++;
1350 cur->svs_offset = offset;
1351 cur->svs_page = addr;
1352 return (WALK_NEXT);
1353 }
1354
1355 static int
1356 segvn_sparse_cmp(const void *lp, const void *rp)
1357 {
1358 const segvn_sparse_t *const l = lp;
1359 const segvn_sparse_t *const r = rp;
1360
1361 if (l->svs_offset < r->svs_offset) {
1362 return (-1);
1363 }
1364 if (l->svs_offset > r->svs_offset) {
1365 return (1);
1366 }
1367 return (0);
1368 }
1369
1370 /*
1371 * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1372 * structure. For segvn_datas without an anon structure, it just looks up
1373 * pages in the vnode. For segvn_datas with an anon structure, NULL slots
1374 * pass through to the vnode, and non-null slots are checked for residency.
1375 */
1376 int
1377 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1378 {
1379 segvn_walk_data_t *svw;
1380 struct segvn_data *svd;
1381
1382 if (wsp->walk_addr == NULL) {
1383 mdb_warn("segvn walk doesn't support global walks\n");
1384 return (WALK_ERR);
1385 }
1386
1387 svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1388 svw->svw_svdp = wsp->walk_addr;
1389 svw->svw_anonskip = 0;
1390 svw->svw_sparse_idx = 0;
1391 svw->svw_walkoff = 0;
1392 svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1393
1394 if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1395 -1) {
1396 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1397 mdb_free(svw, sizeof (*svw));
1398 return (WALK_ERR);
1399 }
1400
1401 svd = &svw->svw_svd;
1402 if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1403 (uintptr_t)svd->seg) == -1) {
1404 mdb_warn("failed to read seg at %p (from %p)",
1405 svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1406 mdb_free(svw, sizeof (*svw));
1407 return (WALK_ERR);
1408 }
1409
1410 if (svd->amp == NULL && svd->vp == NULL) {
1411 /* make the walk terminate immediately; no pages */
1412 svw->svw_walkoff = svw->svw_seg.s_size;
1413
1414 } else if (svd->amp == NULL &&
1415 (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1416 /*
1417 * If we don't have an anon pointer, and the segment is large,
1418 * we try to load the in-memory pages into a fixed-size array,
1419 * which is then sorted and reported directly. This is much
1420 * faster than doing a mdb_page_lookup() for each possible
1421 * offset.
1422 *
1423 * If the allocation fails, or there are too many pages
1424 * in-core, we fall back to looking up the pages individually.
1425 */
1426 svw->svw_sparse = mdb_alloc(
1427 SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1428 if (svw->svw_sparse != NULL) {
1429 svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1430
1431 if (mdb_pwalk("page", segvn_sparse_fill, svw,
1432 (uintptr_t)svd->vp) == -1 ||
1433 svw->svw_sparse_overflow) {
1434 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1435 sizeof (*svw->svw_sparse));
1436 svw->svw_sparse = NULL;
1437 } else {
1438 qsort(svw->svw_sparse, svw->svw_sparse_count,
1439 sizeof (*svw->svw_sparse),
1440 segvn_sparse_cmp);
1441 }
1442 }
1443
1444 } else if (svd->amp != NULL) {
1445 const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1446 "segvn_anon" : "segvn_anon_all";
1447 /*
1448 * If we're not printing all offsets, and the segvn_data has
1449 * no backing VP, we can use the "segvn_anon" walker, which
1450 * efficiently skips NULL slots.
1451 *
1452 * Otherwise, we layer over the "segvn_anon_all" walker
1453 * (which reports all anon slots, even NULL ones), so that
1454 * segvn_pages_walk_step() knows the precise offset for each
1455 * element. It uses that offset information to look up the
1456 * backing pages for NULL anon slots.
1457 */
1458 if (mdb_layered_walk(layer, wsp) == -1) {
1459 mdb_warn("segvn_pages: failed to layer \"%s\" "
1460 "for segvn_data %p", layer, svw->svw_svdp);
1461 mdb_free(svw, sizeof (*svw));
1462 return (WALK_ERR);
1463 }
1464 }
1465
1466 wsp->walk_data = svw;
1467 return (WALK_NEXT);
1468 }
1469
1470 int
1471 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1472 {
1473 segvn_walk_data_t *const svw = wsp->walk_data;
1474 struct seg *const seg = &svw->svw_seg;
1475 struct segvn_data *const svd = &svw->svw_svd;
1476 uintptr_t pp;
1477 page_t page;
1478
1479 /* If we've walked off the end of the segment, we're done. */
1480 if (svw->svw_walkoff >= seg->s_size) {
1481 return (WALK_DONE);
1482 }
1483
1484 /*
1485 * If we've got a sparse page array, just send it directly.
1486 */
1487 if (svw->svw_sparse != NULL) {
1488 u_offset_t off;
1489
1490 if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1491 pp = NULL;
1492 if (!svw->svw_all) {
1493 return (WALK_DONE);
1494 }
1495 } else {
1496 segvn_sparse_t *const svs =
1497 &svw->svw_sparse[svw->svw_sparse_idx];
1498 off = svs->svs_offset - svd->offset;
1499 if (svw->svw_all && svw->svw_walkoff != off) {
1500 pp = NULL;
1501 } else {
1502 pp = svs->svs_page;
1503 svw->svw_sparse_idx++;
1504 }
1505 }
1506
1507 } else if (svd->amp == NULL || wsp->walk_addr == NULL) {
1508 /*
1509 * If there's no anon, or the anon slot is NULL, look up
1510 * <vp, offset>.
1511 */
1512 if (svd->vp != NULL) {
1513 pp = mdb_page_lookup((uintptr_t)svd->vp,
1514 svd->offset + svw->svw_walkoff);
1515 } else {
1516 pp = NULL;
1517 }
1518
1519 } else {
1520 const struct anon *const anon = wsp->walk_layer;
1521
1522 /*
1523 * We have a "struct anon"; if it's not swapped out,
1524 * look up the page.
1525 */
1526 if (anon->an_vp != NULL || anon->an_off != 0) {
1527 pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1528 anon->an_off);
1529 if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1530 mdb_warn("walk segvn_pages: segvn_data %p "
1531 "offset %ld, anon page <%p, %llx> not "
1532 "found.\n", svw->svw_svdp, svw->svw_walkoff,
1533 anon->an_vp, anon->an_off);
1534 }
1535 } else {
1536 if (anon->an_pvp == NULL) {
1537 mdb_warn("walk segvn_pages: useless struct "
1538 "anon at %p\n", wsp->walk_addr);
1539 }
1540 pp = NULL; /* nothing at this offset */
1541 }
1542 }
1543
1544 svw->svw_walkoff += PAGESIZE; /* Update for the next call */
1545 if (pp != NULL) {
1546 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1547 mdb_warn("unable to read page_t at %#lx", pp);
1548 return (WALK_ERR);
1549 }
1550 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1551 }
1552 if (svw->svw_all) {
1553 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1554 }
1555 return (WALK_NEXT);
1556 }
1557
1558 void
1559 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1560 {
1561 segvn_walk_data_t *const svw = wsp->walk_data;
1562
1563 if (svw->svw_sparse != NULL) {
1564 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1565 sizeof (*svw->svw_sparse));
1566 }
1567 mdb_free(svw, sizeof (*svw));
1568 }
1569
1570 /*
1571 * Grumble, grumble.
1572 */
1573 #define SMAP_HASHFUNC(vp, off) \
1574 ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1575 ((off) >> MAXBSHIFT)) & smd_hashmsk)
1576
1577 int
1578 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1579 {
1580 long smd_hashmsk;
1581 int hash;
1582 uintptr_t offset = 0;
1583 struct smap smp;
1584 uintptr_t saddr, kaddr;
1585 uintptr_t smd_hash, smd_smap;
1586 struct seg seg;
1587
1588 if (!(flags & DCMD_ADDRSPEC))
1589 return (DCMD_USAGE);
1590
1591 if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1592 mdb_warn("failed to read smd_hashmsk");
1593 return (DCMD_ERR);
1594 }
1595
1596 if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1597 mdb_warn("failed to read smd_hash");
1598 return (DCMD_ERR);
1599 }
1600
1601 if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1602 mdb_warn("failed to read smd_hash");
1603 return (DCMD_ERR);
1604 }
1605
1606 if (mdb_readvar(&kaddr, "segkmap") == -1) {
1607 mdb_warn("failed to read segkmap");
1608 return (DCMD_ERR);
1609 }
1610
1611 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1612 mdb_warn("failed to read segkmap at %p", kaddr);
1613 return (DCMD_ERR);
1614 }
1615
1616 if (argc != 0) {
1617 const mdb_arg_t *arg = &argv[0];
1618
1619 if (arg->a_type == MDB_TYPE_IMMEDIATE)
1620 offset = arg->a_un.a_val;
1621 else
1622 offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1623 }
1624
1625 hash = SMAP_HASHFUNC(addr, offset);
1626
1627 if (mdb_vread(&saddr, sizeof (saddr),
1628 smd_hash + hash * sizeof (uintptr_t)) == -1) {
1629 mdb_warn("couldn't read smap at %p",
1630 smd_hash + hash * sizeof (uintptr_t));
1631 return (DCMD_ERR);
1632 }
1633
1634 do {
1635 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1636 mdb_warn("couldn't read smap at %p", saddr);
1637 return (DCMD_ERR);
1638 }
1639
1640 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1641 mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1642 addr, offset, saddr, ((saddr - smd_smap) /
1643 sizeof (smp)) * MAXBSIZE + seg.s_base);
1644 return (DCMD_OK);
1645 }
1646
1647 saddr = (uintptr_t)smp.sm_hash;
1648 } while (saddr != NULL);
1649
1650 mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1651 return (DCMD_OK);
1652 }
1653
1654 /*ARGSUSED*/
1655 int
1656 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1657 {
1658 uintptr_t kaddr;
1659 struct seg seg;
1660 struct segmap_data sd;
1661
1662 if (!(flags & DCMD_ADDRSPEC))
1663 return (DCMD_USAGE);
1664
1665 if (mdb_readvar(&kaddr, "segkmap") == -1) {
1666 mdb_warn("failed to read segkmap");
1667 return (DCMD_ERR);
1668 }
1669
1670 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1671 mdb_warn("failed to read segkmap at %p", kaddr);
1672 return (DCMD_ERR);
1673 }
1674
1675 if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1676 mdb_warn("failed to read segmap_data at %p", seg.s_data);
1677 return (DCMD_ERR);
1678 }
1679
1680 mdb_printf("%p is smap %p\n", addr,
1681 ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1682 sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1683
1684 return (DCMD_OK);
1685 }