1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2018 Joyent, Inc.  All rights reserved.
  28  * Copyright (c) 2012 by Delphix. All rights reserved.
  29  */
  30 
  31 #include <mdb/mdb_param.h>
  32 #include <mdb/mdb_modapi.h>
  33 #include <mdb/mdb_ctf.h>
  34 #include <mdb/mdb_whatis.h>
  35 #include <sys/cpuvar.h>
  36 #include <sys/kmem_impl.h>
  37 #include <sys/vmem_impl.h>
  38 #include <sys/machelf.h>
  39 #include <sys/modctl.h>
  40 #include <sys/kobj.h>
  41 #include <sys/panic.h>
  42 #include <sys/stack.h>
  43 #include <sys/sysmacros.h>
  44 #include <vm/page.h>
  45 
  46 #include "avl.h"
  47 #include "combined.h"
  48 #include "dist.h"
  49 #include "kmem.h"
  50 #include "list.h"
  51 
  52 #define dprintf(x) if (mdb_debug_level) { \
  53         mdb_printf("kmem debug: ");  \
  54         /*CSTYLED*/\
  55         mdb_printf x ;\
  56 }
  57 
  58 #define KM_ALLOCATED            0x01
  59 #define KM_FREE                 0x02
  60 #define KM_BUFCTL               0x04
  61 #define KM_CONSTRUCTED          0x08    /* only constructed free buffers */
  62 #define KM_HASH                 0x10
  63 
  64 static int mdb_debug_level = 0;
  65 
  66 /*ARGSUSED*/
  67 static int
  68 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
  69 {
  70         mdb_walker_t w;
  71         char descr[64];
  72 
  73         (void) mdb_snprintf(descr, sizeof (descr),
  74             "walk the %s cache", c->cache_name);
  75 
  76         w.walk_name = c->cache_name;
  77         w.walk_descr = descr;
  78         w.walk_init = kmem_walk_init;
  79         w.walk_step = kmem_walk_step;
  80         w.walk_fini = kmem_walk_fini;
  81         w.walk_init_arg = (void *)addr;
  82 
  83         if (mdb_add_walker(&w) == -1)
  84                 mdb_warn("failed to add %s walker", c->cache_name);
  85 
  86         return (WALK_NEXT);
  87 }
  88 
  89 /*ARGSUSED*/
  90 int
  91 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
  92 {
  93         mdb_debug_level ^= 1;
  94 
  95         mdb_printf("kmem: debugging is now %s\n",
  96             mdb_debug_level ? "on" : "off");
  97 
  98         return (DCMD_OK);
  99 }
 100 
 101 int
 102 kmem_cache_walk_init(mdb_walk_state_t *wsp)
 103 {
 104         GElf_Sym sym;
 105 
 106         if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
 107                 mdb_warn("couldn't find kmem_caches");
 108                 return (WALK_ERR);
 109         }
 110 
 111         wsp->walk_addr = (uintptr_t)sym.st_value;
 112 
 113         return (list_walk_init_named(wsp, "cache list", "cache"));
 114 }
 115 
 116 int
 117 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
 118 {
 119         if (wsp->walk_addr == NULL) {
 120                 mdb_warn("kmem_cpu_cache doesn't support global walks");
 121                 return (WALK_ERR);
 122         }
 123 
 124         if (mdb_layered_walk("cpu", wsp) == -1) {
 125                 mdb_warn("couldn't walk 'cpu'");
 126                 return (WALK_ERR);
 127         }
 128 
 129         wsp->walk_data = (void *)wsp->walk_addr;
 130 
 131         return (WALK_NEXT);
 132 }
 133 
 134 int
 135 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
 136 {
 137         uintptr_t caddr = (uintptr_t)wsp->walk_data;
 138         const cpu_t *cpu = wsp->walk_layer;
 139         kmem_cpu_cache_t cc;
 140 
 141         caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
 142 
 143         if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
 144                 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
 145                 return (WALK_ERR);
 146         }
 147 
 148         return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
 149 }
 150 
 151 static int
 152 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
 153 {
 154         kmem_slab_t *sp = p;
 155         uintptr_t caddr = (uintptr_t)arg;
 156         if ((uintptr_t)sp->slab_cache != caddr) {
 157                 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
 158                     saddr, caddr, sp->slab_cache);
 159                 return (-1);
 160         }
 161 
 162         return (0);
 163 }
 164 
 165 static int
 166 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
 167 {
 168         kmem_slab_t *sp = p;
 169 
 170         int rc = kmem_slab_check(p, saddr, arg);
 171         if (rc != 0) {
 172                 return (rc);
 173         }
 174 
 175         if (!KMEM_SLAB_IS_PARTIAL(sp)) {
 176                 mdb_warn("slab %p is not a partial slab\n", saddr);
 177                 return (-1);
 178         }
 179 
 180         return (0);
 181 }
 182 
 183 static int
 184 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
 185 {
 186         kmem_slab_t *sp = p;
 187 
 188         int rc = kmem_slab_check(p, saddr, arg);
 189         if (rc != 0) {
 190                 return (rc);
 191         }
 192 
 193         if (!KMEM_SLAB_IS_ALL_USED(sp)) {
 194                 mdb_warn("slab %p is not completely allocated\n", saddr);
 195                 return (-1);
 196         }
 197 
 198         return (0);
 199 }
 200 
 201 typedef struct {
 202         uintptr_t kns_cache_addr;
 203         int kns_nslabs;
 204 } kmem_nth_slab_t;
 205 
 206 static int
 207 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
 208 {
 209         kmem_nth_slab_t *chkp = arg;
 210 
 211         int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
 212         if (rc != 0) {
 213                 return (rc);
 214         }
 215 
 216         return (chkp->kns_nslabs-- == 0 ? 1 : 0);
 217 }
 218 
 219 static int
 220 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
 221 {
 222         uintptr_t caddr = wsp->walk_addr;
 223 
 224         wsp->walk_addr = (uintptr_t)(caddr +
 225             offsetof(kmem_cache_t, cache_complete_slabs));
 226 
 227         return (list_walk_init_checked(wsp, "slab list", "slab",
 228             kmem_complete_slab_check, (void *)caddr));
 229 }
 230 
 231 static int
 232 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
 233 {
 234         uintptr_t caddr = wsp->walk_addr;
 235 
 236         wsp->walk_addr = (uintptr_t)(caddr +
 237             offsetof(kmem_cache_t, cache_partial_slabs));
 238 
 239         return (avl_walk_init_checked(wsp, "slab list", "slab",
 240             kmem_partial_slab_check, (void *)caddr));
 241 }
 242 
 243 int
 244 kmem_slab_walk_init(mdb_walk_state_t *wsp)
 245 {
 246         uintptr_t caddr = wsp->walk_addr;
 247 
 248         if (caddr == NULL) {
 249                 mdb_warn("kmem_slab doesn't support global walks\n");
 250                 return (WALK_ERR);
 251         }
 252 
 253         combined_walk_init(wsp);
 254         combined_walk_add(wsp,
 255             kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
 256         combined_walk_add(wsp,
 257             kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
 258 
 259         return (WALK_NEXT);
 260 }
 261 
 262 static int
 263 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
 264 {
 265         uintptr_t caddr = wsp->walk_addr;
 266         kmem_nth_slab_t *chk;
 267 
 268         chk = mdb_alloc(sizeof (kmem_nth_slab_t),
 269             UM_SLEEP | UM_GC);
 270         chk->kns_cache_addr = caddr;
 271         chk->kns_nslabs = 1;
 272         wsp->walk_addr = (uintptr_t)(caddr +
 273             offsetof(kmem_cache_t, cache_complete_slabs));
 274 
 275         return (list_walk_init_checked(wsp, "slab list", "slab",
 276             kmem_nth_slab_check, chk));
 277 }
 278 
 279 int
 280 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
 281 {
 282         uintptr_t caddr = wsp->walk_addr;
 283         kmem_cache_t c;
 284 
 285         if (caddr == NULL) {
 286                 mdb_warn("kmem_slab_partial doesn't support global walks\n");
 287                 return (WALK_ERR);
 288         }
 289 
 290         if (mdb_vread(&c, sizeof (c), caddr) == -1) {
 291                 mdb_warn("couldn't read kmem_cache at %p", caddr);
 292                 return (WALK_ERR);
 293         }
 294 
 295         combined_walk_init(wsp);
 296 
 297         /*
 298          * Some consumers (umem_walk_step(), in particular) require at
 299          * least one callback if there are any buffers in the cache.  So
 300          * if there are *no* partial slabs, report the first full slab, if
 301          * any.
 302          *
 303          * Yes, this is ugly, but it's cleaner than the other possibilities.
 304          */
 305         if (c.cache_partial_slabs.avl_numnodes == 0) {
 306                 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
 307                     list_walk_step, list_walk_fini);
 308         } else {
 309                 combined_walk_add(wsp, kmem_partial_slab_walk_init,
 310                     avl_walk_step, avl_walk_fini);
 311         }
 312 
 313         return (WALK_NEXT);
 314 }
 315 
 316 int
 317 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
 318 {
 319         kmem_cache_t c;
 320         const char *filter = NULL;
 321 
 322         if (mdb_getopts(ac, argv,
 323             'n', MDB_OPT_STR, &filter,
 324             NULL) != ac) {
 325                 return (DCMD_USAGE);
 326         }
 327 
 328         if (!(flags & DCMD_ADDRSPEC)) {
 329                 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
 330                         mdb_warn("can't walk kmem_cache");
 331                         return (DCMD_ERR);
 332                 }
 333                 return (DCMD_OK);
 334         }
 335 
 336         if (DCMD_HDRSPEC(flags))
 337                 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
 338                     "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
 339 
 340         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 341                 mdb_warn("couldn't read kmem_cache at %p", addr);
 342                 return (DCMD_ERR);
 343         }
 344 
 345         if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
 346                 return (DCMD_OK);
 347 
 348         mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
 349             c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
 350 
 351         return (DCMD_OK);
 352 }
 353 
 354 void
 355 kmem_cache_help(void)
 356 {
 357         mdb_printf("%s", "Print kernel memory caches.\n\n");
 358         mdb_dec_indent(2);
 359         mdb_printf("%<b>OPTIONS%</b>\n");
 360         mdb_inc_indent(2);
 361         mdb_printf("%s",
 362 "  -n name\n"
 363 "        name of kmem cache (or matching partial name)\n"
 364 "\n"
 365 "Column\tDescription\n"
 366 "\n"
 367 "ADDR\t\taddress of kmem cache\n"
 368 "NAME\t\tname of kmem cache\n"
 369 "FLAG\t\tvarious cache state flags\n"
 370 "CFLAG\t\tcache creation flags\n"
 371 "BUFSIZE\tobject size in bytes\n"
 372 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
 373 }
 374 
 375 #define LABEL_WIDTH     11
 376 static void
 377 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
 378     size_t maxbuckets, size_t minbucketsize)
 379 {
 380         uint64_t total;
 381         int buckets;
 382         int i;
 383         const int *distarray;
 384         int complete[2];
 385 
 386         buckets = buffers_per_slab;
 387 
 388         total = 0;
 389         for (i = 0; i <= buffers_per_slab; i++)
 390                 total += ks_bucket[i];
 391 
 392         if (maxbuckets > 1)
 393                 buckets = MIN(buckets, maxbuckets);
 394 
 395         if (minbucketsize > 1) {
 396                 /*
 397                  * minbucketsize does not apply to the first bucket reserved
 398                  * for completely allocated slabs
 399                  */
 400                 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
 401                     minbucketsize));
 402                 if ((buckets < 2) && (buffers_per_slab > 1)) {
 403                         buckets = 2;
 404                         minbucketsize = (buffers_per_slab - 1);
 405                 }
 406         }
 407 
 408         /*
 409          * The first printed bucket is reserved for completely allocated slabs.
 410          * Passing (buckets - 1) excludes that bucket from the generated
 411          * distribution, since we're handling it as a special case.
 412          */
 413         complete[0] = buffers_per_slab;
 414         complete[1] = buffers_per_slab + 1;
 415         distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
 416 
 417         mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
 418         dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
 419 
 420         dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
 421         /*
 422          * Print bucket ranges in descending order after the first bucket for
 423          * completely allocated slabs, so a person can see immediately whether
 424          * or not there is fragmentation without having to scan possibly
 425          * multiple screens of output. Starting at (buckets - 2) excludes the
 426          * extra terminating bucket.
 427          */
 428         for (i = buckets - 2; i >= 0; i--) {
 429                 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
 430         }
 431         mdb_printf("\n");
 432 }
 433 #undef LABEL_WIDTH
 434 
 435 /*ARGSUSED*/
 436 static int
 437 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
 438 {
 439         *is_slab = B_TRUE;
 440         return (WALK_DONE);
 441 }
 442 
 443 /*ARGSUSED*/
 444 static int
 445 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
 446     boolean_t *is_slab)
 447 {
 448         /*
 449          * The "kmem_partial_slab" walker reports the first full slab if there
 450          * are no partial slabs (for the sake of consumers that require at least
 451          * one callback if there are any buffers in the cache).
 452          */
 453         *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
 454         return (WALK_DONE);
 455 }
 456 
 457 typedef struct kmem_slab_usage {
 458         int ksu_refcnt;                 /* count of allocated buffers on slab */
 459         boolean_t ksu_nomove;           /* slab marked non-reclaimable */
 460 } kmem_slab_usage_t;
 461 
 462 typedef struct kmem_slab_stats {
 463         const kmem_cache_t *ks_cp;
 464         int ks_slabs;                   /* slabs in cache */
 465         int ks_partial_slabs;           /* partially allocated slabs in cache */
 466         uint64_t ks_unused_buffers;     /* total unused buffers in cache */
 467         int ks_max_buffers_per_slab;    /* max buffers per slab */
 468         int ks_usage_len;               /* ks_usage array length */
 469         kmem_slab_usage_t *ks_usage;    /* partial slab usage */
 470         uint_t *ks_bucket;              /* slab usage distribution */
 471 } kmem_slab_stats_t;
 472 
 473 /*ARGSUSED*/
 474 static int
 475 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
 476     kmem_slab_stats_t *ks)
 477 {
 478         kmem_slab_usage_t *ksu;
 479         long unused;
 480 
 481         ks->ks_slabs++;
 482         ks->ks_bucket[sp->slab_refcnt]++;
 483 
 484         unused = (sp->slab_chunks - sp->slab_refcnt);
 485         if (unused == 0) {
 486                 return (WALK_NEXT);
 487         }
 488 
 489         ks->ks_partial_slabs++;
 490         ks->ks_unused_buffers += unused;
 491 
 492         if (ks->ks_partial_slabs > ks->ks_usage_len) {
 493                 kmem_slab_usage_t *usage;
 494                 int len = ks->ks_usage_len;
 495 
 496                 len = (len == 0 ? 16 : len * 2);
 497                 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
 498                 if (ks->ks_usage != NULL) {
 499                         bcopy(ks->ks_usage, usage,
 500                             ks->ks_usage_len * sizeof (kmem_slab_usage_t));
 501                         mdb_free(ks->ks_usage,
 502                             ks->ks_usage_len * sizeof (kmem_slab_usage_t));
 503                 }
 504                 ks->ks_usage = usage;
 505                 ks->ks_usage_len = len;
 506         }
 507 
 508         ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
 509         ksu->ksu_refcnt = sp->slab_refcnt;
 510         ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
 511         return (WALK_NEXT);
 512 }
 513 
 514 static void
 515 kmem_slabs_header()
 516 {
 517         mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 518             "", "", "Partial", "", "Unused", "");
 519         mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 520             "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
 521         mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 522             "-------------------------", "--------", "--------", "---------",
 523             "---------", "------");
 524 }
 525 
 526 int
 527 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 528 {
 529         kmem_cache_t c;
 530         kmem_slab_stats_t stats;
 531         mdb_walk_cb_t cb;
 532         int pct;
 533         int tenths_pct;
 534         size_t maxbuckets = 1;
 535         size_t minbucketsize = 0;
 536         const char *filter = NULL;
 537         const char *name = NULL;
 538         uint_t opt_v = FALSE;
 539         boolean_t buckets = B_FALSE;
 540         boolean_t skip = B_FALSE;
 541 
 542         if (mdb_getopts(argc, argv,
 543             'B', MDB_OPT_UINTPTR, &minbucketsize,
 544             'b', MDB_OPT_UINTPTR, &maxbuckets,
 545             'n', MDB_OPT_STR, &filter,
 546             'N', MDB_OPT_STR, &name,
 547             'v', MDB_OPT_SETBITS, TRUE, &opt_v,
 548             NULL) != argc) {
 549                 return (DCMD_USAGE);
 550         }
 551 
 552         if ((maxbuckets != 1) || (minbucketsize != 0)) {
 553                 buckets = B_TRUE;
 554         }
 555 
 556         if (!(flags & DCMD_ADDRSPEC)) {
 557                 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
 558                     argv) == -1) {
 559                         mdb_warn("can't walk kmem_cache");
 560                         return (DCMD_ERR);
 561                 }
 562                 return (DCMD_OK);
 563         }
 564 
 565         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 566                 mdb_warn("couldn't read kmem_cache at %p", addr);
 567                 return (DCMD_ERR);
 568         }
 569 
 570         if (name == NULL) {
 571                 skip = ((filter != NULL) &&
 572                     (strstr(c.cache_name, filter) == NULL));
 573         } else if (filter == NULL) {
 574                 skip = (strcmp(c.cache_name, name) != 0);
 575         } else {
 576                 /* match either -n or -N */
 577                 skip = ((strcmp(c.cache_name, name) != 0) &&
 578                     (strstr(c.cache_name, filter) == NULL));
 579         }
 580 
 581         if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
 582                 kmem_slabs_header();
 583         } else if ((opt_v || buckets) && !skip) {
 584                 if (DCMD_HDRSPEC(flags)) {
 585                         kmem_slabs_header();
 586                 } else {
 587                         boolean_t is_slab = B_FALSE;
 588                         const char *walker_name;
 589                         if (opt_v) {
 590                                 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
 591                                 walker_name = "kmem_slab_partial";
 592                         } else {
 593                                 cb = (mdb_walk_cb_t)kmem_first_slab;
 594                                 walker_name = "kmem_slab";
 595                         }
 596                         (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
 597                         if (is_slab) {
 598                                 kmem_slabs_header();
 599                         }
 600                 }
 601         }
 602 
 603         if (skip) {
 604                 return (DCMD_OK);
 605         }
 606 
 607         bzero(&stats, sizeof (kmem_slab_stats_t));
 608         stats.ks_cp = &c;
 609         stats.ks_max_buffers_per_slab = c.cache_maxchunks;
 610         /* +1 to include a zero bucket */
 611         stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
 612             sizeof (*stats.ks_bucket), UM_SLEEP);
 613         cb = (mdb_walk_cb_t)kmem_slablist_stat;
 614         (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
 615 
 616         if (c.cache_buftotal == 0) {
 617                 pct = 0;
 618                 tenths_pct = 0;
 619         } else {
 620                 uint64_t n = stats.ks_unused_buffers * 10000;
 621                 pct = (int)(n / c.cache_buftotal);
 622                 tenths_pct = pct - ((pct / 100) * 100);
 623                 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
 624                 if (tenths_pct == 10) {
 625                         pct += 100;
 626                         tenths_pct = 0;
 627                 }
 628         }
 629 
 630         pct /= 100;
 631         mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
 632             stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
 633             stats.ks_unused_buffers, pct, tenths_pct);
 634 
 635         if (maxbuckets == 0) {
 636                 maxbuckets = stats.ks_max_buffers_per_slab;
 637         }
 638 
 639         if (((maxbuckets > 1) || (minbucketsize > 0)) &&
 640             (stats.ks_slabs > 0)) {
 641                 mdb_printf("\n");
 642                 kmem_slabs_print_dist(stats.ks_bucket,
 643                     stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
 644         }
 645 
 646         mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
 647             sizeof (*stats.ks_bucket));
 648 
 649         if (!opt_v) {
 650                 return (DCMD_OK);
 651         }
 652 
 653         if (opt_v && (stats.ks_partial_slabs > 0)) {
 654                 int i;
 655                 kmem_slab_usage_t *ksu;
 656 
 657                 mdb_printf("  %d complete (%d), %d partial:",
 658                     (stats.ks_slabs - stats.ks_partial_slabs),
 659                     stats.ks_max_buffers_per_slab,
 660                     stats.ks_partial_slabs);
 661 
 662                 for (i = 0; i < stats.ks_partial_slabs; i++) {
 663                         ksu = &stats.ks_usage[i];
 664                         mdb_printf(" %d%s", ksu->ksu_refcnt,
 665                             (ksu->ksu_nomove ? "*" : ""));
 666                 }
 667                 mdb_printf("\n\n");
 668         }
 669 
 670         if (stats.ks_usage_len > 0) {
 671                 mdb_free(stats.ks_usage,
 672                     stats.ks_usage_len * sizeof (kmem_slab_usage_t));
 673         }
 674 
 675         return (DCMD_OK);
 676 }
 677 
 678 void
 679 kmem_slabs_help(void)
 680 {
 681         mdb_printf("%s",
 682 "Display slab usage per kmem cache.\n\n");
 683         mdb_dec_indent(2);
 684         mdb_printf("%<b>OPTIONS%</b>\n");
 685         mdb_inc_indent(2);
 686         mdb_printf("%s",
 687 "  -n name\n"
 688 "        name of kmem cache (or matching partial name)\n"
 689 "  -N name\n"
 690 "        exact name of kmem cache\n"
 691 "  -b maxbins\n"
 692 "        Print a distribution of allocated buffers per slab using at\n"
 693 "        most maxbins bins. The first bin is reserved for completely\n"
 694 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
 695 "        effect as specifying the maximum allocated buffers per slab\n"
 696 "        or setting minbinsize to 1 (-B 1).\n"
 697 "  -B minbinsize\n"
 698 "        Print a distribution of allocated buffers per slab, making\n"
 699 "        all bins (except the first, reserved for completely allocated\n"
 700 "        slabs) at least minbinsize buffers apart.\n"
 701 "  -v    verbose output: List the allocated buffer count of each partial\n"
 702 "        slab on the free list in order from front to back to show how\n"
 703 "        closely the slabs are ordered by usage. For example\n"
 704 "\n"
 705 "          10 complete, 3 partial (8): 7 3 1\n"
 706 "\n"
 707 "        means there are thirteen slabs with eight buffers each, including\n"
 708 "        three partially allocated slabs with less than all eight buffers\n"
 709 "        allocated.\n"
 710 "\n"
 711 "        Buffer allocations are always from the front of the partial slab\n"
 712 "        list. When a buffer is freed from a completely used slab, that\n"
 713 "        slab is added to the front of the partial slab list. Assuming\n"
 714 "        that all buffers are equally likely to be freed soon, the\n"
 715 "        desired order of partial slabs is most-used at the front of the\n"
 716 "        list and least-used at the back (as in the example above).\n"
 717 "        However, if a slab contains an allocated buffer that will not\n"
 718 "        soon be freed, it would be better for that slab to be at the\n"
 719 "        front where all of its buffers can be allocated. Taking a slab\n"
 720 "        off the partial slab list (either with all buffers freed or all\n"
 721 "        buffers allocated) reduces cache fragmentation.\n"
 722 "\n"
 723 "        A slab's allocated buffer count representing a partial slab (9 in\n"
 724 "        the example below) may be marked as follows:\n"
 725 "\n"
 726 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
 727 "        reclaimable because the kmem client refused to move one of the\n"
 728 "        slab's buffers. Since kmem does not expect to completely free the\n"
 729 "        slab, it moves it to the front of the list in the hope of\n"
 730 "        completely allocating it instead. A slab marked with an asterisk\n"
 731 "        stays marked for as long as it remains on the partial slab list.\n"
 732 "\n"
 733 "Column\t\tDescription\n"
 734 "\n"
 735 "Cache Name\t\tname of kmem cache\n"
 736 "Slabs\t\t\ttotal slab count\n"
 737 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
 738 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
 739 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
 740 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
 741 "\t\t\t  for accounting structures (debug mode), slab\n"
 742 "\t\t\t  coloring (incremental small offsets to stagger\n"
 743 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
 744 }
 745 
 746 static int
 747 addrcmp(const void *lhs, const void *rhs)
 748 {
 749         uintptr_t p1 = *((uintptr_t *)lhs);
 750         uintptr_t p2 = *((uintptr_t *)rhs);
 751 
 752         if (p1 < p2)
 753                 return (-1);
 754         if (p1 > p2)
 755                 return (1);
 756         return (0);
 757 }
 758 
 759 static int
 760 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
 761 {
 762         const kmem_bufctl_audit_t *bcp1 = *lhs;
 763         const kmem_bufctl_audit_t *bcp2 = *rhs;
 764 
 765         if (bcp1->bc_timestamp > bcp2->bc_timestamp)
 766                 return (-1);
 767 
 768         if (bcp1->bc_timestamp < bcp2->bc_timestamp)
 769                 return (1);
 770 
 771         return (0);
 772 }
 773 
 774 typedef struct kmem_hash_walk {
 775         uintptr_t *kmhw_table;
 776         size_t kmhw_nelems;
 777         size_t kmhw_pos;
 778         kmem_bufctl_t kmhw_cur;
 779 } kmem_hash_walk_t;
 780 
 781 int
 782 kmem_hash_walk_init(mdb_walk_state_t *wsp)
 783 {
 784         kmem_hash_walk_t *kmhw;
 785         uintptr_t *hash;
 786         kmem_cache_t c;
 787         uintptr_t haddr, addr = wsp->walk_addr;
 788         size_t nelems;
 789         size_t hsize;
 790 
 791         if (addr == NULL) {
 792                 mdb_warn("kmem_hash doesn't support global walks\n");
 793                 return (WALK_ERR);
 794         }
 795 
 796         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 797                 mdb_warn("couldn't read cache at addr %p", addr);
 798                 return (WALK_ERR);
 799         }
 800 
 801         if (!(c.cache_flags & KMF_HASH)) {
 802                 mdb_warn("cache %p doesn't have a hash table\n", addr);
 803                 return (WALK_DONE);             /* nothing to do */
 804         }
 805 
 806         kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
 807         kmhw->kmhw_cur.bc_next = NULL;
 808         kmhw->kmhw_pos = 0;
 809 
 810         kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
 811         hsize = nelems * sizeof (uintptr_t);
 812         haddr = (uintptr_t)c.cache_hash_table;
 813 
 814         kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
 815         if (mdb_vread(hash, hsize, haddr) == -1) {
 816                 mdb_warn("failed to read hash table at %p", haddr);
 817                 mdb_free(hash, hsize);
 818                 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
 819                 return (WALK_ERR);
 820         }
 821 
 822         wsp->walk_data = kmhw;
 823 
 824         return (WALK_NEXT);
 825 }
 826 
 827 int
 828 kmem_hash_walk_step(mdb_walk_state_t *wsp)
 829 {
 830         kmem_hash_walk_t *kmhw = wsp->walk_data;
 831         uintptr_t addr = NULL;
 832 
 833         if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
 834                 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
 835                         if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
 836                                 break;
 837                 }
 838         }
 839         if (addr == NULL)
 840                 return (WALK_DONE);
 841 
 842         if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
 843                 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
 844                 return (WALK_ERR);
 845         }
 846 
 847         return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
 848 }
 849 
 850 void
 851 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
 852 {
 853         kmem_hash_walk_t *kmhw = wsp->walk_data;
 854 
 855         if (kmhw == NULL)
 856                 return;
 857 
 858         mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
 859         mdb_free(kmhw, sizeof (kmem_hash_walk_t));
 860 }
 861 
 862 /*
 863  * Find the address of the bufctl structure for the address 'buf' in cache
 864  * 'cp', which is at address caddr, and place it in *out.
 865  */
 866 static int
 867 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
 868 {
 869         uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
 870         kmem_bufctl_t *bcp;
 871         kmem_bufctl_t bc;
 872 
 873         if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
 874                 mdb_warn("unable to read hash bucket for %p in cache %p",
 875                     buf, caddr);
 876                 return (-1);
 877         }
 878 
 879         while (bcp != NULL) {
 880                 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
 881                     (uintptr_t)bcp) == -1) {
 882                         mdb_warn("unable to read bufctl at %p", bcp);
 883                         return (-1);
 884                 }
 885                 if (bc.bc_addr == buf) {
 886                         *out = (uintptr_t)bcp;
 887                         return (0);
 888                 }
 889                 bcp = bc.bc_next;
 890         }
 891 
 892         mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
 893         return (-1);
 894 }
 895 
 896 int
 897 kmem_get_magsize(const kmem_cache_t *cp)
 898 {
 899         uintptr_t addr = (uintptr_t)cp->cache_magtype;
 900         GElf_Sym mt_sym;
 901         kmem_magtype_t mt;
 902         int res;
 903 
 904         /*
 905          * if cpu 0 has a non-zero magsize, it must be correct.  caches
 906          * with KMF_NOMAGAZINE have disabled their magazine layers, so
 907          * it is okay to return 0 for them.
 908          */
 909         if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
 910             (cp->cache_flags & KMF_NOMAGAZINE))
 911                 return (res);
 912 
 913         if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
 914                 mdb_warn("unable to read 'kmem_magtype'");
 915         } else if (addr < mt_sym.st_value ||
 916             addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
 917             ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
 918                 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
 919                     cp->cache_name, addr);
 920                 return (0);
 921         }
 922         if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
 923                 mdb_warn("unable to read magtype at %a", addr);
 924                 return (0);
 925         }
 926         return (mt.mt_magsize);
 927 }
 928 
 929 /*ARGSUSED*/
 930 static int
 931 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
 932 {
 933         *est -= (sp->slab_chunks - sp->slab_refcnt);
 934 
 935         return (WALK_NEXT);
 936 }
 937 
 938 /*
 939  * Returns an upper bound on the number of allocated buffers in a given
 940  * cache.
 941  */
 942 size_t
 943 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
 944 {
 945         int magsize;
 946         size_t cache_est;
 947 
 948         cache_est = cp->cache_buftotal;
 949 
 950         (void) mdb_pwalk("kmem_slab_partial",
 951             (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
 952 
 953         if ((magsize = kmem_get_magsize(cp)) != 0) {
 954                 size_t mag_est = cp->cache_full.ml_total * magsize;
 955 
 956                 if (cache_est >= mag_est) {
 957                         cache_est -= mag_est;
 958                 } else {
 959                         mdb_warn("cache %p's magazine layer holds more buffers "
 960                             "than the slab layer.\n", addr);
 961                 }
 962         }
 963         return (cache_est);
 964 }
 965 
 966 #define READMAG_ROUNDS(rounds) { \
 967         if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
 968                 mdb_warn("couldn't read magazine at %p", kmp); \
 969                 goto fail; \
 970         } \
 971         for (i = 0; i < rounds; i++) { \
 972                 maglist[magcnt++] = mp->mag_round[i]; \
 973                 if (magcnt == magmax) { \
 974                         mdb_warn("%d magazines exceeds fudge factor\n", \
 975                             magcnt); \
 976                         goto fail; \
 977                 } \
 978         } \
 979 }
 980 
 981 int
 982 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
 983     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
 984 {
 985         kmem_magazine_t *kmp, *mp;
 986         void **maglist = NULL;
 987         int i, cpu;
 988         size_t magsize, magmax, magbsize;
 989         size_t magcnt = 0;
 990 
 991         /*
 992          * Read the magtype out of the cache, after verifying the pointer's
 993          * correctness.
 994          */
 995         magsize = kmem_get_magsize(cp);
 996         if (magsize == 0) {
 997                 *maglistp = NULL;
 998                 *magcntp = 0;
 999                 *magmaxp = 0;
1000                 return (WALK_NEXT);
1001         }
1002 
1003         /*
1004          * There are several places where we need to go buffer hunting:
1005          * the per-CPU loaded magazine, the per-CPU spare full magazine,
1006          * and the full magazine list in the depot.
1007          *
1008          * For an upper bound on the number of buffers in the magazine
1009          * layer, we have the number of magazines on the cache_full
1010          * list plus at most two magazines per CPU (the loaded and the
1011          * spare).  Toss in 100 magazines as a fudge factor in case this
1012          * is live (the number "100" comes from the same fudge factor in
1013          * crash(1M)).
1014          */
1015         magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1016         magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1017 
1018         if (magbsize >= PAGESIZE / 2) {
1019                 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1020                     addr, magbsize);
1021                 return (WALK_ERR);
1022         }
1023 
1024         maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1025         mp = mdb_alloc(magbsize, alloc_flags);
1026         if (mp == NULL || maglist == NULL)
1027                 goto fail;
1028 
1029         /*
1030          * First up: the magazines in the depot (i.e. on the cache_full list).
1031          */
1032         for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1033                 READMAG_ROUNDS(magsize);
1034                 kmp = mp->mag_next;
1035 
1036                 if (kmp == cp->cache_full.ml_list)
1037                         break; /* cache_full list loop detected */
1038         }
1039 
1040         dprintf(("cache_full list done\n"));
1041 
1042         /*
1043          * Now whip through the CPUs, snagging the loaded magazines
1044          * and full spares.
1045          *
1046          * In order to prevent inconsistent dumps, rounds and prounds
1047          * are copied aside before dumping begins.
1048          */
1049         for (cpu = 0; cpu < ncpus; cpu++) {
1050                 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1051                 short rounds, prounds;
1052 
1053                 if (KMEM_DUMPCC(ccp)) {
1054                         rounds = ccp->cc_dump_rounds;
1055                         prounds = ccp->cc_dump_prounds;
1056                 } else {
1057                         rounds = ccp->cc_rounds;
1058                         prounds = ccp->cc_prounds;
1059                 }
1060 
1061                 dprintf(("reading cpu cache %p\n",
1062                     (uintptr_t)ccp - (uintptr_t)cp + addr));
1063 
1064                 if (rounds > 0 &&
1065                     (kmp = ccp->cc_loaded) != NULL) {
1066                         dprintf(("reading %d loaded rounds\n", rounds));
1067                         READMAG_ROUNDS(rounds);
1068                 }
1069 
1070                 if (prounds > 0 &&
1071                     (kmp = ccp->cc_ploaded) != NULL) {
1072                         dprintf(("reading %d previously loaded rounds\n",
1073                             prounds));
1074                         READMAG_ROUNDS(prounds);
1075                 }
1076         }
1077 
1078         dprintf(("magazine layer: %d buffers\n", magcnt));
1079 
1080         if (!(alloc_flags & UM_GC))
1081                 mdb_free(mp, magbsize);
1082 
1083         *maglistp = maglist;
1084         *magcntp = magcnt;
1085         *magmaxp = magmax;
1086 
1087         return (WALK_NEXT);
1088 
1089 fail:
1090         if (!(alloc_flags & UM_GC)) {
1091                 if (mp)
1092                         mdb_free(mp, magbsize);
1093                 if (maglist)
1094                         mdb_free(maglist, magmax * sizeof (void *));
1095         }
1096         return (WALK_ERR);
1097 }
1098 
1099 static int
1100 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1101 {
1102         return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1103 }
1104 
1105 static int
1106 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1107 {
1108         kmem_bufctl_audit_t b;
1109 
1110         /*
1111          * if KMF_AUDIT is not set, we know that we're looking at a
1112          * kmem_bufctl_t.
1113          */
1114         if (!(cp->cache_flags & KMF_AUDIT) ||
1115             mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1116                 (void) memset(&b, 0, sizeof (b));
1117                 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1118                         mdb_warn("unable to read bufctl at %p", buf);
1119                         return (WALK_ERR);
1120                 }
1121         }
1122 
1123         return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1124 }
1125 
1126 typedef struct kmem_walk {
1127         int kmw_type;
1128 
1129         uintptr_t kmw_addr;             /* cache address */
1130         kmem_cache_t *kmw_cp;
1131         size_t kmw_csize;
1132 
1133         /*
1134          * magazine layer
1135          */
1136         void **kmw_maglist;
1137         size_t kmw_max;
1138         size_t kmw_count;
1139         size_t kmw_pos;
1140 
1141         /*
1142          * slab layer
1143          */
1144         char *kmw_valid;        /* to keep track of freed buffers */
1145         char *kmw_ubase;        /* buffer for slab data */
1146 } kmem_walk_t;
1147 
1148 static int
1149 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1150 {
1151         kmem_walk_t *kmw;
1152         int ncpus, csize;
1153         kmem_cache_t *cp;
1154         size_t vm_quantum;
1155 
1156         size_t magmax, magcnt;
1157         void **maglist = NULL;
1158         uint_t chunksize, slabsize;
1159         int status = WALK_ERR;
1160         uintptr_t addr = wsp->walk_addr;
1161         const char *layered;
1162 
1163         type &= ~KM_HASH;
1164 
1165         if (addr == NULL) {
1166                 mdb_warn("kmem walk doesn't support global walks\n");
1167                 return (WALK_ERR);
1168         }
1169 
1170         dprintf(("walking %p\n", addr));
1171 
1172         /*
1173          * First we need to figure out how many CPUs are configured in the
1174          * system to know how much to slurp out.
1175          */
1176         mdb_readvar(&ncpus, "max_ncpus");
1177 
1178         csize = KMEM_CACHE_SIZE(ncpus);
1179         cp = mdb_alloc(csize, UM_SLEEP);
1180 
1181         if (mdb_vread(cp, csize, addr) == -1) {
1182                 mdb_warn("couldn't read cache at addr %p", addr);
1183                 goto out2;
1184         }
1185 
1186         /*
1187          * It's easy for someone to hand us an invalid cache address.
1188          * Unfortunately, it is hard for this walker to survive an
1189          * invalid cache cleanly.  So we make sure that:
1190          *
1191          *      1. the vmem arena for the cache is readable,
1192          *      2. the vmem arena's quantum is a power of 2,
1193          *      3. our slabsize is a multiple of the quantum, and
1194          *      4. our chunksize is >0 and less than our slabsize.
1195          */
1196         if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1197             (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1198             vm_quantum == 0 ||
1199             (vm_quantum & (vm_quantum - 1)) != 0 ||
1200             cp->cache_slabsize < vm_quantum ||
1201             P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1202             cp->cache_chunksize == 0 ||
1203             cp->cache_chunksize > cp->cache_slabsize) {
1204                 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1205                 goto out2;
1206         }
1207 
1208         dprintf(("buf total is %d\n", cp->cache_buftotal));
1209 
1210         if (cp->cache_buftotal == 0) {
1211                 mdb_free(cp, csize);
1212                 return (WALK_DONE);
1213         }
1214 
1215         /*
1216          * If they ask for bufctls, but it's a small-slab cache,
1217          * there is nothing to report.
1218          */
1219         if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1220                 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1221                     cp->cache_flags));
1222                 mdb_free(cp, csize);
1223                 return (WALK_DONE);
1224         }
1225 
1226         /*
1227          * If they want constructed buffers, but there's no constructor or
1228          * the cache has DEADBEEF checking enabled, there is nothing to report.
1229          */
1230         if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1231             cp->cache_constructor == NULL ||
1232             (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1233                 mdb_free(cp, csize);
1234                 return (WALK_DONE);
1235         }
1236 
1237         /*
1238          * Read in the contents of the magazine layer
1239          */
1240         if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1241             &magmax, UM_SLEEP) == WALK_ERR)
1242                 goto out2;
1243 
1244         /*
1245          * We have all of the buffers from the magazines;  if we are walking
1246          * allocated buffers, sort them so we can bsearch them later.
1247          */
1248         if (type & KM_ALLOCATED)
1249                 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1250 
1251         wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1252 
1253         kmw->kmw_type = type;
1254         kmw->kmw_addr = addr;
1255         kmw->kmw_cp = cp;
1256         kmw->kmw_csize = csize;
1257         kmw->kmw_maglist = maglist;
1258         kmw->kmw_max = magmax;
1259         kmw->kmw_count = magcnt;
1260         kmw->kmw_pos = 0;
1261 
1262         /*
1263          * When walking allocated buffers in a KMF_HASH cache, we walk the
1264          * hash table instead of the slab layer.
1265          */
1266         if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1267                 layered = "kmem_hash";
1268 
1269                 kmw->kmw_type |= KM_HASH;
1270         } else {
1271                 /*
1272                  * If we are walking freed buffers, we only need the
1273                  * magazine layer plus the partially allocated slabs.
1274                  * To walk allocated buffers, we need all of the slabs.
1275                  */
1276                 if (type & KM_ALLOCATED)
1277                         layered = "kmem_slab";
1278                 else
1279                         layered = "kmem_slab_partial";
1280 
1281                 /*
1282                  * for small-slab caches, we read in the entire slab.  For
1283                  * freed buffers, we can just walk the freelist.  For
1284                  * allocated buffers, we use a 'valid' array to track
1285                  * the freed buffers.
1286                  */
1287                 if (!(cp->cache_flags & KMF_HASH)) {
1288                         chunksize = cp->cache_chunksize;
1289                         slabsize = cp->cache_slabsize;
1290 
1291                         kmw->kmw_ubase = mdb_alloc(slabsize +
1292                             sizeof (kmem_bufctl_t), UM_SLEEP);
1293 
1294                         if (type & KM_ALLOCATED)
1295                                 kmw->kmw_valid =
1296                                     mdb_alloc(slabsize / chunksize, UM_SLEEP);
1297                 }
1298         }
1299 
1300         status = WALK_NEXT;
1301 
1302         if (mdb_layered_walk(layered, wsp) == -1) {
1303                 mdb_warn("unable to start layered '%s' walk", layered);
1304                 status = WALK_ERR;
1305         }
1306 
1307 out1:
1308         if (status == WALK_ERR) {
1309                 if (kmw->kmw_valid)
1310                         mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311 
1312                 if (kmw->kmw_ubase)
1313                         mdb_free(kmw->kmw_ubase, slabsize +
1314                             sizeof (kmem_bufctl_t));
1315 
1316                 if (kmw->kmw_maglist)
1317                         mdb_free(kmw->kmw_maglist,
1318                             kmw->kmw_max * sizeof (uintptr_t));
1319 
1320                 mdb_free(kmw, sizeof (kmem_walk_t));
1321                 wsp->walk_data = NULL;
1322         }
1323 
1324 out2:
1325         if (status == WALK_ERR)
1326                 mdb_free(cp, csize);
1327 
1328         return (status);
1329 }
1330 
1331 int
1332 kmem_walk_step(mdb_walk_state_t *wsp)
1333 {
1334         kmem_walk_t *kmw = wsp->walk_data;
1335         int type = kmw->kmw_type;
1336         kmem_cache_t *cp = kmw->kmw_cp;
1337 
1338         void **maglist = kmw->kmw_maglist;
1339         int magcnt = kmw->kmw_count;
1340 
1341         uintptr_t chunksize, slabsize;
1342         uintptr_t addr;
1343         const kmem_slab_t *sp;
1344         const kmem_bufctl_t *bcp;
1345         kmem_bufctl_t bc;
1346 
1347         int chunks;
1348         char *kbase;
1349         void *buf;
1350         int i, ret;
1351 
1352         char *valid, *ubase;
1353 
1354         /*
1355          * first, handle the 'kmem_hash' layered walk case
1356          */
1357         if (type & KM_HASH) {
1358                 /*
1359                  * We have a buffer which has been allocated out of the
1360                  * global layer. We need to make sure that it's not
1361                  * actually sitting in a magazine before we report it as
1362                  * an allocated buffer.
1363                  */
1364                 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365 
1366                 if (magcnt > 0 &&
1367                     bsearch(&buf, maglist, magcnt, sizeof (void *),
1368                     addrcmp) != NULL)
1369                         return (WALK_NEXT);
1370 
1371                 if (type & KM_BUFCTL)
1372                         return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373 
1374                 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1375         }
1376 
1377         ret = WALK_NEXT;
1378 
1379         addr = kmw->kmw_addr;
1380 
1381         /*
1382          * If we're walking freed buffers, report everything in the
1383          * magazine layer before processing the first slab.
1384          */
1385         if ((type & KM_FREE) && magcnt != 0) {
1386                 kmw->kmw_count = 0;          /* only do this once */
1387                 for (i = 0; i < magcnt; i++) {
1388                         buf = maglist[i];
1389 
1390                         if (type & KM_BUFCTL) {
1391                                 uintptr_t out;
1392 
1393                                 if (cp->cache_flags & KMF_BUFTAG) {
1394                                         kmem_buftag_t *btp;
1395                                         kmem_buftag_t tag;
1396 
1397                                         /* LINTED - alignment */
1398                                         btp = KMEM_BUFTAG(cp, buf);
1399                                         if (mdb_vread(&tag, sizeof (tag),
1400                                             (uintptr_t)btp) == -1) {
1401                                                 mdb_warn("reading buftag for "
1402                                                     "%p at %p", buf, btp);
1403                                                 continue;
1404                                         }
1405                                         out = (uintptr_t)tag.bt_bufctl;
1406                                 } else {
1407                                         if (kmem_hash_lookup(cp, addr, buf,
1408                                             &out) == -1)
1409                                                 continue;
1410                                 }
1411                                 ret = bufctl_walk_callback(cp, wsp, out);
1412                         } else {
1413                                 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1414                         }
1415 
1416                         if (ret != WALK_NEXT)
1417                                 return (ret);
1418                 }
1419         }
1420 
1421         /*
1422          * If they want constructed buffers, we're finished, since the
1423          * magazine layer holds them all.
1424          */
1425         if (type & KM_CONSTRUCTED)
1426                 return (WALK_DONE);
1427 
1428         /*
1429          * Handle the buffers in the current slab
1430          */
1431         chunksize = cp->cache_chunksize;
1432         slabsize = cp->cache_slabsize;
1433 
1434         sp = wsp->walk_layer;
1435         chunks = sp->slab_chunks;
1436         kbase = sp->slab_base;
1437 
1438         dprintf(("kbase is %p\n", kbase));
1439 
1440         if (!(cp->cache_flags & KMF_HASH)) {
1441                 valid = kmw->kmw_valid;
1442                 ubase = kmw->kmw_ubase;
1443 
1444                 if (mdb_vread(ubase, chunks * chunksize,
1445                     (uintptr_t)kbase) == -1) {
1446                         mdb_warn("failed to read slab contents at %p", kbase);
1447                         return (WALK_ERR);
1448                 }
1449 
1450                 /*
1451                  * Set up the valid map as fully allocated -- we'll punch
1452                  * out the freelist.
1453                  */
1454                 if (type & KM_ALLOCATED)
1455                         (void) memset(valid, 1, chunks);
1456         } else {
1457                 valid = NULL;
1458                 ubase = NULL;
1459         }
1460 
1461         /*
1462          * walk the slab's freelist
1463          */
1464         bcp = sp->slab_head;
1465 
1466         dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1467 
1468         /*
1469          * since we could be in the middle of allocating a buffer,
1470          * our refcnt could be one higher than it aught.  So we
1471          * check one further on the freelist than the count allows.
1472          */
1473         for (i = sp->slab_refcnt; i <= chunks; i++) {
1474                 uint_t ndx;
1475 
1476                 dprintf(("bcp is %p\n", bcp));
1477 
1478                 if (bcp == NULL) {
1479                         if (i == chunks)
1480                                 break;
1481                         mdb_warn(
1482                             "slab %p in cache %p freelist too short by %d\n",
1483                             sp, addr, chunks - i);
1484                         break;
1485                 }
1486 
1487                 if (cp->cache_flags & KMF_HASH) {
1488                         if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1489                                 mdb_warn("failed to read bufctl ptr at %p",
1490                                     bcp);
1491                                 break;
1492                         }
1493                         buf = bc.bc_addr;
1494                 } else {
1495                         /*
1496                          * Otherwise the buffer is (or should be) in the slab
1497                          * that we've read in; determine its offset in the
1498                          * slab, validate that it's not corrupt, and add to
1499                          * our base address to find the umem_bufctl_t.  (Note
1500                          * that we don't need to add the size of the bufctl
1501                          * to our offset calculation because of the slop that's
1502                          * allocated for the buffer at ubase.)
1503                          */
1504                         uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505 
1506                         if (offs > chunks * chunksize) {
1507                                 mdb_warn("found corrupt bufctl ptr %p"
1508                                     " in slab %p in cache %p\n", bcp,
1509                                     wsp->walk_addr, addr);
1510                                 break;
1511                         }
1512 
1513                         bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1514                         buf = KMEM_BUF(cp, bcp);
1515                 }
1516 
1517                 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518 
1519                 if (ndx > slabsize / cp->cache_bufsize) {
1520                         /*
1521                          * This is very wrong; we have managed to find
1522                          * a buffer in the slab which shouldn't
1523                          * actually be here.  Emit a warning, and
1524                          * try to continue.
1525                          */
1526                         mdb_warn("buf %p is out of range for "
1527                             "slab %p, cache %p\n", buf, sp, addr);
1528                 } else if (type & KM_ALLOCATED) {
1529                         /*
1530                          * we have found a buffer on the slab's freelist;
1531                          * clear its entry
1532                          */
1533                         valid[ndx] = 0;
1534                 } else {
1535                         /*
1536                          * Report this freed buffer
1537                          */
1538                         if (type & KM_BUFCTL) {
1539                                 ret = bufctl_walk_callback(cp, wsp,
1540                                     (uintptr_t)bcp);
1541                         } else {
1542                                 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543                         }
1544                         if (ret != WALK_NEXT)
1545                                 return (ret);
1546                 }
1547 
1548                 bcp = bc.bc_next;
1549         }
1550 
1551         if (bcp != NULL) {
1552                 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1553                     sp, addr, bcp));
1554         }
1555 
1556         /*
1557          * If we are walking freed buffers, the loop above handled reporting
1558          * them.
1559          */
1560         if (type & KM_FREE)
1561                 return (WALK_NEXT);
1562 
1563         if (type & KM_BUFCTL) {
1564                 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1565                     "cache %p\n", addr);
1566                 return (WALK_ERR);
1567         }
1568 
1569         /*
1570          * Report allocated buffers, skipping buffers in the magazine layer.
1571          * We only get this far for small-slab caches.
1572          */
1573         for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1574                 buf = (char *)kbase + i * chunksize;
1575 
1576                 if (!valid[i])
1577                         continue;               /* on slab freelist */
1578 
1579                 if (magcnt > 0 &&
1580                     bsearch(&buf, maglist, magcnt, sizeof (void *),
1581                     addrcmp) != NULL)
1582                         continue;               /* in magazine layer */
1583 
1584                 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585         }
1586         return (ret);
1587 }
1588 
1589 void
1590 kmem_walk_fini(mdb_walk_state_t *wsp)
1591 {
1592         kmem_walk_t *kmw = wsp->walk_data;
1593         uintptr_t chunksize;
1594         uintptr_t slabsize;
1595 
1596         if (kmw == NULL)
1597                 return;
1598 
1599         if (kmw->kmw_maglist != NULL)
1600                 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601 
1602         chunksize = kmw->kmw_cp->cache_chunksize;
1603         slabsize = kmw->kmw_cp->cache_slabsize;
1604 
1605         if (kmw->kmw_valid != NULL)
1606                 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1607         if (kmw->kmw_ubase != NULL)
1608                 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609 
1610         mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1611         mdb_free(kmw, sizeof (kmem_walk_t));
1612 }
1613 
1614 /*ARGSUSED*/
1615 static int
1616 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1617 {
1618         /*
1619          * Buffers allocated from NOTOUCH caches can also show up as freed
1620          * memory in other caches.  This can be a little confusing, so we
1621          * don't walk NOTOUCH caches when walking all caches (thereby assuring
1622          * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623          */
1624         if (c->cache_cflags & KMC_NOTOUCH)
1625                 return (WALK_NEXT);
1626 
1627         if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1628             wsp->walk_cbdata, addr) == -1)
1629                 return (WALK_DONE);
1630 
1631         return (WALK_NEXT);
1632 }
1633 
1634 #define KMEM_WALK_ALL(name, wsp) { \
1635         wsp->walk_data = (name); \
1636         if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1637                 return (WALK_ERR); \
1638         return (WALK_DONE); \
1639 }
1640 
1641 int
1642 kmem_walk_init(mdb_walk_state_t *wsp)
1643 {
1644         if (wsp->walk_arg != NULL)
1645                 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646 
1647         if (wsp->walk_addr == NULL)
1648                 KMEM_WALK_ALL("kmem", wsp);
1649         return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1650 }
1651 
1652 int
1653 bufctl_walk_init(mdb_walk_state_t *wsp)
1654 {
1655         if (wsp->walk_addr == NULL)
1656                 KMEM_WALK_ALL("bufctl", wsp);
1657         return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1658 }
1659 
1660 int
1661 freemem_walk_init(mdb_walk_state_t *wsp)
1662 {
1663         if (wsp->walk_addr == NULL)
1664                 KMEM_WALK_ALL("freemem", wsp);
1665         return (kmem_walk_init_common(wsp, KM_FREE));
1666 }
1667 
1668 int
1669 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 {
1671         if (wsp->walk_addr == NULL)
1672                 KMEM_WALK_ALL("freemem_constructed", wsp);
1673         return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1674 }
1675 
1676 int
1677 freectl_walk_init(mdb_walk_state_t *wsp)
1678 {
1679         if (wsp->walk_addr == NULL)
1680                 KMEM_WALK_ALL("freectl", wsp);
1681         return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1682 }
1683 
1684 int
1685 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 {
1687         if (wsp->walk_addr == NULL)
1688                 KMEM_WALK_ALL("freectl_constructed", wsp);
1689         return (kmem_walk_init_common(wsp,
1690             KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1691 }
1692 
1693 typedef struct bufctl_history_walk {
1694         void            *bhw_next;
1695         kmem_cache_t    *bhw_cache;
1696         kmem_slab_t     *bhw_slab;
1697         hrtime_t        bhw_timestamp;
1698 } bufctl_history_walk_t;
1699 
1700 int
1701 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 {
1703         bufctl_history_walk_t *bhw;
1704         kmem_bufctl_audit_t bc;
1705         kmem_bufctl_audit_t bcn;
1706 
1707         if (wsp->walk_addr == NULL) {
1708                 mdb_warn("bufctl_history walk doesn't support global walks\n");
1709                 return (WALK_ERR);
1710         }
1711 
1712         if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1713                 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1714                 return (WALK_ERR);
1715         }
1716 
1717         bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1718         bhw->bhw_timestamp = 0;
1719         bhw->bhw_cache = bc.bc_cache;
1720         bhw->bhw_slab = bc.bc_slab;
1721 
1722         /*
1723          * sometimes the first log entry matches the base bufctl;  in that
1724          * case, skip the base bufctl.
1725          */
1726         if (bc.bc_lastlog != NULL &&
1727             mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1728             bc.bc_addr == bcn.bc_addr &&
1729             bc.bc_cache == bcn.bc_cache &&
1730             bc.bc_slab == bcn.bc_slab &&
1731             bc.bc_timestamp == bcn.bc_timestamp &&
1732             bc.bc_thread == bcn.bc_thread)
1733                 bhw->bhw_next = bc.bc_lastlog;
1734         else
1735                 bhw->bhw_next = (void *)wsp->walk_addr;
1736 
1737         wsp->walk_addr = (uintptr_t)bc.bc_addr;
1738         wsp->walk_data = bhw;
1739 
1740         return (WALK_NEXT);
1741 }
1742 
1743 int
1744 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 {
1746         bufctl_history_walk_t *bhw = wsp->walk_data;
1747         uintptr_t addr = (uintptr_t)bhw->bhw_next;
1748         uintptr_t baseaddr = wsp->walk_addr;
1749         kmem_bufctl_audit_t bc;
1750 
1751         if (addr == NULL)
1752                 return (WALK_DONE);
1753 
1754         if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1755                 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1756                 return (WALK_ERR);
1757         }
1758 
1759         /*
1760          * The bufctl is only valid if the address, cache, and slab are
1761          * correct.  We also check that the timestamp is decreasing, to
1762          * prevent infinite loops.
1763          */
1764         if ((uintptr_t)bc.bc_addr != baseaddr ||
1765             bc.bc_cache != bhw->bhw_cache ||
1766             bc.bc_slab != bhw->bhw_slab ||
1767             (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1768                 return (WALK_DONE);
1769 
1770         bhw->bhw_next = bc.bc_lastlog;
1771         bhw->bhw_timestamp = bc.bc_timestamp;
1772 
1773         return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1774 }
1775 
1776 void
1777 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 {
1779         bufctl_history_walk_t *bhw = wsp->walk_data;
1780 
1781         mdb_free(bhw, sizeof (*bhw));
1782 }
1783 
1784 typedef struct kmem_log_walk {
1785         kmem_bufctl_audit_t *klw_base;
1786         kmem_bufctl_audit_t **klw_sorted;
1787         kmem_log_header_t klw_lh;
1788         size_t klw_size;
1789         size_t klw_maxndx;
1790         size_t klw_ndx;
1791 } kmem_log_walk_t;
1792 
1793 int
1794 kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 {
1796         uintptr_t lp = wsp->walk_addr;
1797         kmem_log_walk_t *klw;
1798         kmem_log_header_t *lhp;
1799         int maxndx, i, j, k;
1800 
1801         /*
1802          * By default (global walk), walk the kmem_transaction_log.  Otherwise
1803          * read the log whose kmem_log_header_t is stored at walk_addr.
1804          */
1805         if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1806                 mdb_warn("failed to read 'kmem_transaction_log'");
1807                 return (WALK_ERR);
1808         }
1809 
1810         if (lp == NULL) {
1811                 mdb_warn("log is disabled\n");
1812                 return (WALK_ERR);
1813         }
1814 
1815         klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1816         lhp = &klw->klw_lh;
1817 
1818         if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1819                 mdb_warn("failed to read log header at %p", lp);
1820                 mdb_free(klw, sizeof (kmem_log_walk_t));
1821                 return (WALK_ERR);
1822         }
1823 
1824         klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1825         klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1826         maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827 
1828         if (mdb_vread(klw->klw_base, klw->klw_size,
1829             (uintptr_t)lhp->lh_base) == -1) {
1830                 mdb_warn("failed to read log at base %p", lhp->lh_base);
1831                 mdb_free(klw->klw_base, klw->klw_size);
1832                 mdb_free(klw, sizeof (kmem_log_walk_t));
1833                 return (WALK_ERR);
1834         }
1835 
1836         klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1837             sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838 
1839         for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1840                 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1841                     ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842 
1843                 for (j = 0; j < maxndx; j++)
1844                         klw->klw_sorted[k++] = &chunk[j];
1845         }
1846 
1847         qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1848             (int(*)(const void *, const void *))bufctlcmp);
1849 
1850         klw->klw_maxndx = k;
1851         wsp->walk_data = klw;
1852 
1853         return (WALK_NEXT);
1854 }
1855 
1856 int
1857 kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 {
1859         kmem_log_walk_t *klw = wsp->walk_data;
1860         kmem_bufctl_audit_t *bcp;
1861 
1862         if (klw->klw_ndx == klw->klw_maxndx)
1863                 return (WALK_DONE);
1864 
1865         bcp = klw->klw_sorted[klw->klw_ndx++];
1866 
1867         return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1868             (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1869 }
1870 
1871 void
1872 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 {
1874         kmem_log_walk_t *klw = wsp->walk_data;
1875 
1876         mdb_free(klw->klw_base, klw->klw_size);
1877         mdb_free(klw->klw_sorted, klw->klw_maxndx *
1878             sizeof (kmem_bufctl_audit_t *));
1879         mdb_free(klw, sizeof (kmem_log_walk_t));
1880 }
1881 
1882 typedef struct allocdby_bufctl {
1883         uintptr_t abb_addr;
1884         hrtime_t abb_ts;
1885 } allocdby_bufctl_t;
1886 
1887 typedef struct allocdby_walk {
1888         const char *abw_walk;
1889         uintptr_t abw_thread;
1890         size_t abw_nbufs;
1891         size_t abw_size;
1892         allocdby_bufctl_t *abw_buf;
1893         size_t abw_ndx;
1894 } allocdby_walk_t;
1895 
1896 int
1897 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1898     allocdby_walk_t *abw)
1899 {
1900         if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1901                 return (WALK_NEXT);
1902 
1903         if (abw->abw_nbufs == abw->abw_size) {
1904                 allocdby_bufctl_t *buf;
1905                 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906 
1907                 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908 
1909                 bcopy(abw->abw_buf, buf, oldsize);
1910                 mdb_free(abw->abw_buf, oldsize);
1911 
1912                 abw->abw_size <<= 1;
1913                 abw->abw_buf = buf;
1914         }
1915 
1916         abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1917         abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1918         abw->abw_nbufs++;
1919 
1920         return (WALK_NEXT);
1921 }
1922 
1923 /*ARGSUSED*/
1924 int
1925 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 {
1927         if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1928             abw, addr) == -1) {
1929                 mdb_warn("couldn't walk bufctl for cache %p", addr);
1930                 return (WALK_DONE);
1931         }
1932 
1933         return (WALK_NEXT);
1934 }
1935 
1936 static int
1937 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 {
1939         if (lhs->abb_ts < rhs->abb_ts)
1940                 return (1);
1941         if (lhs->abb_ts > rhs->abb_ts)
1942                 return (-1);
1943         return (0);
1944 }
1945 
1946 static int
1947 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 {
1949         allocdby_walk_t *abw;
1950 
1951         if (wsp->walk_addr == NULL) {
1952                 mdb_warn("allocdby walk doesn't support global walks\n");
1953                 return (WALK_ERR);
1954         }
1955 
1956         abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957 
1958         abw->abw_thread = wsp->walk_addr;
1959         abw->abw_walk = walk;
1960         abw->abw_size = 128; /* something reasonable */
1961         abw->abw_buf =
1962             mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963 
1964         wsp->walk_data = abw;
1965 
1966         if (mdb_walk("kmem_cache",
1967             (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1968                 mdb_warn("couldn't walk kmem_cache");
1969                 allocdby_walk_fini(wsp);
1970                 return (WALK_ERR);
1971         }
1972 
1973         qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1974             (int(*)(const void *, const void *))allocdby_cmp);
1975 
1976         return (WALK_NEXT);
1977 }
1978 
1979 int
1980 allocdby_walk_init(mdb_walk_state_t *wsp)
1981 {
1982         return (allocdby_walk_init_common(wsp, "bufctl"));
1983 }
1984 
1985 int
1986 freedby_walk_init(mdb_walk_state_t *wsp)
1987 {
1988         return (allocdby_walk_init_common(wsp, "freectl"));
1989 }
1990 
1991 int
1992 allocdby_walk_step(mdb_walk_state_t *wsp)
1993 {
1994         allocdby_walk_t *abw = wsp->walk_data;
1995         kmem_bufctl_audit_t bc;
1996         uintptr_t addr;
1997 
1998         if (abw->abw_ndx == abw->abw_nbufs)
1999                 return (WALK_DONE);
2000 
2001         addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002 
2003         if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2004                 mdb_warn("couldn't read bufctl at %p", addr);
2005                 return (WALK_DONE);
2006         }
2007 
2008         return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2009 }
2010 
2011 void
2012 allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 {
2014         allocdby_walk_t *abw = wsp->walk_data;
2015 
2016         mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2017         mdb_free(abw, sizeof (allocdby_walk_t));
2018 }
2019 
2020 /*ARGSUSED*/
2021 int
2022 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 {
2024         char c[MDB_SYM_NAMLEN];
2025         GElf_Sym sym;
2026         int i;
2027 
2028         mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2029         for (i = 0; i < bcp->bc_depth; i++) {
2030                 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2031                     MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2032                         continue;
2033                 if (strncmp(c, "kmem_", 5) == 0)
2034                         continue;
2035                 mdb_printf("%s+0x%lx",
2036                     c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2037                 break;
2038         }
2039         mdb_printf("\n");
2040 
2041         return (WALK_NEXT);
2042 }
2043 
2044 static int
2045 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 {
2047         if (!(flags & DCMD_ADDRSPEC))
2048                 return (DCMD_USAGE);
2049 
2050         mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051 
2052         if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2053                 mdb_warn("can't walk '%s' for %p", w, addr);
2054                 return (DCMD_ERR);
2055         }
2056 
2057         return (DCMD_OK);
2058 }
2059 
2060 /*ARGSUSED*/
2061 int
2062 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 {
2064         return (allocdby_common(addr, flags, "allocdby"));
2065 }
2066 
2067 /*ARGSUSED*/
2068 int
2069 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 {
2071         return (allocdby_common(addr, flags, "freedby"));
2072 }
2073 
2074 /*
2075  * Return a string describing the address in relation to the given thread's
2076  * stack.
2077  *
2078  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079  *
2080  * - If the address is above the stack pointer, return an empty string
2081  *   signifying that the address is active.
2082  *
2083  * - If the address is below the stack pointer, and the thread is not on proc,
2084  *   return " (below sp)".
2085  *
2086  * - If the address is below the stack pointer, and the thread is on proc,
2087  *   return " (possibly below sp)".  Depending on context, we may or may not
2088  *   have an accurate t_sp.
2089  */
2090 static const char *
2091 stack_active(const kthread_t *t, uintptr_t addr)
2092 {
2093         uintptr_t panicstk;
2094         GElf_Sym sym;
2095 
2096         if (t->t_state == TS_FREE)
2097                 return (" (inactive interrupt thread)");
2098 
2099         /*
2100          * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2101          * no longer relates to the thread's real stack.
2102          */
2103         if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2104                 panicstk = (uintptr_t)sym.st_value;
2105 
2106                 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2107                         return ("");
2108         }
2109 
2110         if (addr >= t->t_sp + STACK_BIAS)
2111                 return ("");
2112 
2113         if (t->t_state == TS_ONPROC)
2114                 return (" (possibly below sp)");
2115 
2116         return (" (below sp)");
2117 }
2118 
2119 /*
2120  * Additional state for the kmem and vmem ::whatis handlers
2121  */
2122 typedef struct whatis_info {
2123         mdb_whatis_t *wi_w;
2124         const kmem_cache_t *wi_cache;
2125         const vmem_t *wi_vmem;
2126         vmem_t *wi_msb_arena;
2127         size_t wi_slab_size;
2128         uint_t wi_slab_found;
2129         uint_t wi_kmem_lite_count;
2130         uint_t wi_freemem;
2131 } whatis_info_t;
2132 
2133 /* call one of our dcmd functions with "-v" and the provided address */
2134 static void
2135 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 {
2137         mdb_arg_t a;
2138         a.a_type = MDB_TYPE_STRING;
2139         a.a_un.a_str = "-v";
2140 
2141         mdb_printf(":\n");
2142         (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2143 }
2144 
2145 static void
2146 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 {
2148 #define KMEM_LITE_MAX   16
2149         pc_t callers[KMEM_LITE_MAX];
2150         pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151 
2152         kmem_buftag_t bt;
2153         intptr_t stat;
2154         const char *plural = "";
2155         int i;
2156 
2157         /* validate our arguments and read in the buftag */
2158         if (count == 0 || count > KMEM_LITE_MAX ||
2159             mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2160                 return;
2161 
2162         /* validate the buffer state and read in the callers */
2163         stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164 
2165         if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2166                 return;
2167 
2168         if (mdb_vread(callers, count * sizeof (pc_t),
2169             btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170                 return;
2171 
2172         /* If there aren't any filled in callers, bail */
2173         if (callers[0] == uninit)
2174                 return;
2175 
2176         plural = (callers[1] == uninit) ? "" : "s";
2177 
2178         /* Everything's done and checked; print them out */
2179         mdb_printf(":\n");
2180 
2181         mdb_inc_indent(8);
2182         mdb_printf("recent caller%s: %a", plural, callers[0]);
2183         for (i = 1; i < count; i++) {
2184                 if (callers[i] == uninit)
2185                         break;
2186                 mdb_printf(", %a", callers[i]);
2187         }
2188         mdb_dec_indent(8);
2189 }
2190 
2191 static void
2192 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2193     uintptr_t baddr)
2194 {
2195         mdb_whatis_t *w = wi->wi_w;
2196 
2197         const kmem_cache_t *cp = wi->wi_cache;
2198         /* LINTED pointer cast may result in improper alignment */
2199         uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2200         int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2201         int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2202 
2203         mdb_whatis_report_object(w, maddr, addr, "");
2204 
2205         if (baddr != 0 && !call_printer)
2206                 mdb_printf("bufctl %p ", baddr);
2207 
2208         mdb_printf("%s from %s",
2209             (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2210 
2211         if (baddr != 0 && call_printer) {
2212                 whatis_call_printer(bufctl, baddr);
2213                 return;
2214         }
2215 
2216         /* for KMF_LITE caches, try to print out the previous callers */
2217         if (!quiet && (cp->cache_flags & KMF_LITE))
2218                 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2219 
2220         mdb_printf("\n");
2221 }
2222 
2223 /*ARGSUSED*/
2224 static int
2225 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2226 {
2227         mdb_whatis_t *w = wi->wi_w;
2228 
2229         uintptr_t cur;
2230         size_t size = wi->wi_cache->cache_bufsize;
2231 
2232         while (mdb_whatis_match(w, addr, size, &cur))
2233                 whatis_print_kmem(wi, cur, addr, NULL);
2234 
2235         return (WHATIS_WALKRET(w));
2236 }
2237 
2238 /*ARGSUSED*/
2239 static int
2240 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2241 {
2242         mdb_whatis_t *w = wi->wi_w;
2243 
2244         uintptr_t cur;
2245         uintptr_t addr = (uintptr_t)bcp->bc_addr;
2246         size_t size = wi->wi_cache->cache_bufsize;
2247 
2248         while (mdb_whatis_match(w, addr, size, &cur))
2249                 whatis_print_kmem(wi, cur, addr, baddr);
2250 
2251         return (WHATIS_WALKRET(w));
2252 }
2253 
2254 static int
2255 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2256 {
2257         mdb_whatis_t *w = wi->wi_w;
2258 
2259         size_t size = vs->vs_end - vs->vs_start;
2260         uintptr_t cur;
2261 
2262         /* We're not interested in anything but alloc and free segments */
2263         if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2264                 return (WALK_NEXT);
2265 
2266         while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2267                 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2268 
2269                 /*
2270                  * If we're not printing it seperately, provide the vmem_seg
2271                  * pointer if it has a stack trace.
2272                  */
2273                 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2274                     (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2275                     (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2276                         mdb_printf("vmem_seg %p ", addr);
2277                 }
2278 
2279                 mdb_printf("%s from the %s vmem arena",
2280                     (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2281                     wi->wi_vmem->vm_name);
2282 
2283                 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2284                         whatis_call_printer(vmem_seg, addr);
2285                 else
2286                         mdb_printf("\n");
2287         }
2288 
2289         return (WHATIS_WALKRET(w));
2290 }
2291 
2292 static int
2293 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2294 {
2295         mdb_whatis_t *w = wi->wi_w;
2296         const char *nm = vmem->vm_name;
2297 
2298         int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2299         int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2300 
2301         if (identifier != idspace)
2302                 return (WALK_NEXT);
2303 
2304         wi->wi_vmem = vmem;
2305 
2306         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2307                 mdb_printf("Searching vmem arena %s...\n", nm);
2308 
2309         if (mdb_pwalk("vmem_seg",
2310             (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2311                 mdb_warn("can't walk vmem_seg for %p", addr);
2312                 return (WALK_NEXT);
2313         }
2314 
2315         return (WHATIS_WALKRET(w));
2316 }
2317 
2318 /*ARGSUSED*/
2319 static int
2320 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2321 {
2322         mdb_whatis_t *w = wi->wi_w;
2323 
2324         /* It must overlap with the slab data, or it's not interesting */
2325         if (mdb_whatis_overlaps(w,
2326             (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2327                 wi->wi_slab_found++;
2328                 return (WALK_DONE);
2329         }
2330         return (WALK_NEXT);
2331 }
2332 
2333 static int
2334 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2335 {
2336         mdb_whatis_t *w = wi->wi_w;
2337 
2338         char *walk, *freewalk;
2339         mdb_walk_cb_t func;
2340         int do_bufctl;
2341 
2342         int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2343         int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2344 
2345         if (identifier != idspace)
2346                 return (WALK_NEXT);
2347 
2348         /* Override the '-b' flag as necessary */
2349         if (!(c->cache_flags & KMF_HASH))
2350                 do_bufctl = FALSE;      /* no bufctls to walk */
2351         else if (c->cache_flags & KMF_AUDIT)
2352                 do_bufctl = TRUE;       /* we always want debugging info */
2353         else
2354                 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2355 
2356         if (do_bufctl) {
2357                 walk = "bufctl";
2358                 freewalk = "freectl";
2359                 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2360         } else {
2361                 walk = "kmem";
2362                 freewalk = "freemem";
2363                 func = (mdb_walk_cb_t)whatis_walk_kmem;
2364         }
2365 
2366         wi->wi_cache = c;
2367 
2368         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2369                 mdb_printf("Searching %s...\n", c->cache_name);
2370 
2371         /*
2372          * If more then two buffers live on each slab, figure out if we're
2373          * interested in anything in any slab before doing the more expensive
2374          * kmem/freemem (bufctl/freectl) walkers.
2375          */
2376         wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2377         if (!(c->cache_flags & KMF_HASH))
2378                 wi->wi_slab_size -= sizeof (kmem_slab_t);
2379 
2380         if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2381                 wi->wi_slab_found = 0;
2382                 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2383                     addr) == -1) {
2384                         mdb_warn("can't find kmem_slab walker");
2385                         return (WALK_DONE);
2386                 }
2387                 if (wi->wi_slab_found == 0)
2388                         return (WALK_NEXT);
2389         }
2390 
2391         wi->wi_freemem = FALSE;
2392         if (mdb_pwalk(walk, func, wi, addr) == -1) {
2393                 mdb_warn("can't find %s walker", walk);
2394                 return (WALK_DONE);
2395         }
2396 
2397         if (mdb_whatis_done(w))
2398                 return (WALK_DONE);
2399 
2400         /*
2401          * We have searched for allocated memory; now search for freed memory.
2402          */
2403         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2404                 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2405 
2406         wi->wi_freemem = TRUE;
2407         if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2408                 mdb_warn("can't find %s walker", freewalk);
2409                 return (WALK_DONE);
2410         }
2411 
2412         return (WHATIS_WALKRET(w));
2413 }
2414 
2415 static int
2416 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2417 {
2418         if (c->cache_arena == wi->wi_msb_arena ||
2419             (c->cache_cflags & KMC_NOTOUCH))
2420                 return (WALK_NEXT);
2421 
2422         return (whatis_walk_cache(addr, c, wi));
2423 }
2424 
2425 static int
2426 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2427 {
2428         if (c->cache_arena != wi->wi_msb_arena)
2429                 return (WALK_NEXT);
2430 
2431         return (whatis_walk_cache(addr, c, wi));
2432 }
2433 
2434 static int
2435 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2436 {
2437         if (c->cache_arena == wi->wi_msb_arena ||
2438             !(c->cache_cflags & KMC_NOTOUCH))
2439                 return (WALK_NEXT);
2440 
2441         return (whatis_walk_cache(addr, c, wi));
2442 }
2443 
2444 static int
2445 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2446 {
2447         uintptr_t cur;
2448         uintptr_t saddr;
2449         size_t size;
2450 
2451         /*
2452          * Often, one calls ::whatis on an address from a thread structure.
2453          * We use this opportunity to short circuit this case...
2454          */
2455         while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2456                 mdb_whatis_report_object(w, cur, addr,
2457                     "allocated as a thread structure\n");
2458 
2459         /*
2460          * Now check the stack
2461          */
2462         if (t->t_stkbase == NULL)
2463                 return (WALK_NEXT);
2464 
2465         /*
2466          * This assumes that t_stk is the end of the stack, but it's really
2467          * only the initial stack pointer for the thread.  Arguments to the
2468          * initial procedure, SA(MINFRAME), etc. are all after t_stk.  So
2469          * that 't->t_stk::whatis' reports "part of t's stack", we include
2470          * t_stk in the range (the "+ 1", below), but the kernel should
2471          * really include the full stack bounds where we can find it.
2472          */
2473         saddr = (uintptr_t)t->t_stkbase;
2474         size = (uintptr_t)t->t_stk - saddr + 1;
2475         while (mdb_whatis_match(w, saddr, size, &cur))
2476                 mdb_whatis_report_object(w, cur, cur,
2477                     "in thread %p's stack%s\n", addr, stack_active(t, cur));
2478 
2479         return (WHATIS_WALKRET(w));
2480 }
2481 
2482 static void
2483 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2484     uintptr_t base, size_t size, const char *where)
2485 {
2486         uintptr_t cur;
2487 
2488         /*
2489          * Since we're searching for addresses inside a module, we report
2490          * them as symbols.
2491          */
2492         while (mdb_whatis_match(w, base, size, &cur))
2493                 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2494 }
2495 
2496 static int
2497 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2498 {
2499         char name[MODMAXNAMELEN];
2500         struct module mod;
2501         Shdr shdr;
2502 
2503         if (m->mod_mp == NULL)
2504                 return (WALK_NEXT);
2505 
2506         if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2507                 mdb_warn("couldn't read modctl %p's module", addr);
2508                 return (WALK_NEXT);
2509         }
2510 
2511         if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2512                 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2513 
2514         whatis_modctl_match(w, name,
2515             (uintptr_t)mod.text, mod.text_size, "text segment");
2516         whatis_modctl_match(w, name,
2517             (uintptr_t)mod.data, mod.data_size, "data segment");
2518         whatis_modctl_match(w, name,
2519             (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2520 
2521         if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2522                 mdb_warn("couldn't read symbol header for %p's module", addr);
2523                 return (WALK_NEXT);
2524         }
2525 
2526         whatis_modctl_match(w, name,
2527             (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2528         whatis_modctl_match(w, name,
2529             (uintptr_t)mod.symspace, mod.symsize, "symtab");
2530 
2531         return (WHATIS_WALKRET(w));
2532 }
2533 
2534 /*ARGSUSED*/
2535 static int
2536 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2537 {
2538         uintptr_t cur;
2539 
2540         uintptr_t base = (uintptr_t)seg->pages;
2541         size_t size = (uintptr_t)seg->epages - base;
2542 
2543         while (mdb_whatis_match(w, base, size, &cur)) {
2544                 /* round our found pointer down to the page_t base. */
2545                 size_t offset = (cur - base) % sizeof (page_t);
2546 
2547                 mdb_whatis_report_object(w, cur, cur - offset,
2548                     "allocated as a page structure\n");
2549         }
2550 
2551         return (WHATIS_WALKRET(w));
2552 }
2553 
2554 /*ARGSUSED*/
2555 static int
2556 whatis_run_modules(mdb_whatis_t *w, void *arg)
2557 {
2558         if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2559                 mdb_warn("couldn't find modctl walker");
2560                 return (1);
2561         }
2562         return (0);
2563 }
2564 
2565 /*ARGSUSED*/
2566 static int
2567 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2568 {
2569         /*
2570          * Now search all thread stacks.  Yes, this is a little weak; we
2571          * can save a lot of work by first checking to see if the
2572          * address is in segkp vs. segkmem.  But hey, computers are
2573          * fast.
2574          */
2575         if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2576                 mdb_warn("couldn't find thread walker");
2577                 return (1);
2578         }
2579         return (0);
2580 }
2581 
2582 /*ARGSUSED*/
2583 static int
2584 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2585 {
2586         if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2587                 mdb_warn("couldn't find memseg walker");
2588                 return (1);
2589         }
2590         return (0);
2591 }
2592 
2593 /*ARGSUSED*/
2594 static int
2595 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2596 {
2597         whatis_info_t wi;
2598 
2599         bzero(&wi, sizeof (wi));
2600         wi.wi_w = w;
2601 
2602         if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2603                 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2604 
2605         if (mdb_readvar(&wi.wi_kmem_lite_count,
2606             "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2607                 wi.wi_kmem_lite_count = 0;
2608 
2609         /*
2610          * We process kmem caches in the following order:
2611          *
2612          *      non-KMC_NOTOUCH, non-metadata   (typically the most interesting)
2613          *      metadata                        (can be huge with KMF_AUDIT)
2614          *      KMC_NOTOUCH, non-metadata       (see kmem_walk_all())
2615          */
2616         if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2617             &wi) == -1 ||
2618             mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2619             &wi) == -1 ||
2620             mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2621             &wi) == -1) {
2622                 mdb_warn("couldn't find kmem_cache walker");
2623                 return (1);
2624         }
2625         return (0);
2626 }
2627 
2628 /*ARGSUSED*/
2629 static int
2630 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2631 {
2632         whatis_info_t wi;
2633 
2634         bzero(&wi, sizeof (wi));
2635         wi.wi_w = w;
2636 
2637         if (mdb_walk("vmem_postfix",
2638             (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2639                 mdb_warn("couldn't find vmem_postfix walker");
2640                 return (1);
2641         }
2642         return (0);
2643 }
2644 
2645 typedef struct kmem_log_cpu {
2646         uintptr_t kmc_low;
2647         uintptr_t kmc_high;
2648 } kmem_log_cpu_t;
2649 
2650 typedef struct kmem_log_data {
2651         uintptr_t kmd_addr;
2652         kmem_log_cpu_t *kmd_cpu;
2653 } kmem_log_data_t;
2654 
2655 int
2656 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2657     kmem_log_data_t *kmd)
2658 {
2659         int i;
2660         kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2661         size_t bufsize;
2662 
2663         for (i = 0; i < NCPU; i++) {
2664                 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2665                         break;
2666         }
2667 
2668         if (kmd->kmd_addr) {
2669                 if (b->bc_cache == NULL)
2670                         return (WALK_NEXT);
2671 
2672                 if (mdb_vread(&bufsize, sizeof (bufsize),
2673                     (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2674                         mdb_warn(
2675                             "failed to read cache_bufsize for cache at %p",
2676                             b->bc_cache);
2677                         return (WALK_ERR);
2678                 }
2679 
2680                 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2681                     kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2682                         return (WALK_NEXT);
2683         }
2684 
2685         if (i == NCPU)
2686                 mdb_printf("   ");
2687         else
2688                 mdb_printf("%3d", i);
2689 
2690         mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2691             b->bc_timestamp, b->bc_thread);
2692 
2693         return (WALK_NEXT);
2694 }
2695 
2696 /*ARGSUSED*/
2697 int
2698 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2699 {
2700         kmem_log_header_t lh;
2701         kmem_cpu_log_header_t clh;
2702         uintptr_t lhp, clhp;
2703         int ncpus;
2704         uintptr_t *cpu;
2705         GElf_Sym sym;
2706         kmem_log_cpu_t *kmc;
2707         int i;
2708         kmem_log_data_t kmd;
2709         uint_t opt_b = FALSE;
2710 
2711         if (mdb_getopts(argc, argv,
2712             'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2713                 return (DCMD_USAGE);
2714 
2715         if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2716                 mdb_warn("failed to read 'kmem_transaction_log'");
2717                 return (DCMD_ERR);
2718         }
2719 
2720         if (lhp == NULL) {
2721                 mdb_warn("no kmem transaction log\n");
2722                 return (DCMD_ERR);
2723         }
2724 
2725         mdb_readvar(&ncpus, "ncpus");
2726 
2727         if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2728                 mdb_warn("failed to read log header at %p", lhp);
2729                 return (DCMD_ERR);
2730         }
2731 
2732         clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2733 
2734         cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2735 
2736         if (mdb_lookup_by_name("cpu", &sym) == -1) {
2737                 mdb_warn("couldn't find 'cpu' array");
2738                 return (DCMD_ERR);
2739         }
2740 
2741         if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2742                 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2743                     NCPU * sizeof (uintptr_t), sym.st_size);
2744                 return (DCMD_ERR);
2745         }
2746 
2747         if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2748                 mdb_warn("failed to read cpu array at %p", sym.st_value);
2749                 return (DCMD_ERR);
2750         }
2751 
2752         kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2753         kmd.kmd_addr = NULL;
2754         kmd.kmd_cpu = kmc;
2755 
2756         for (i = 0; i < NCPU; i++) {
2757 
2758                 if (cpu[i] == NULL)
2759                         continue;
2760 
2761                 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2762                         mdb_warn("cannot read cpu %d's log header at %p",
2763                             i, clhp);
2764                         return (DCMD_ERR);
2765                 }
2766 
2767                 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2768                     (uintptr_t)lh.lh_base;
2769                 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2770 
2771                 clhp += sizeof (kmem_cpu_log_header_t);
2772         }
2773 
2774         mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2775             "TIMESTAMP", "THREAD");
2776 
2777         /*
2778          * If we have been passed an address, print out only log entries
2779          * corresponding to that address.  If opt_b is specified, then interpret
2780          * the address as a bufctl.
2781          */
2782         if (flags & DCMD_ADDRSPEC) {
2783                 kmem_bufctl_audit_t b;
2784 
2785                 if (opt_b) {
2786                         kmd.kmd_addr = addr;
2787                 } else {
2788                         if (mdb_vread(&b,
2789                             sizeof (kmem_bufctl_audit_t), addr) == -1) {
2790                                 mdb_warn("failed to read bufctl at %p", addr);
2791                                 return (DCMD_ERR);
2792                         }
2793 
2794                         (void) kmem_log_walk(addr, &b, &kmd);
2795 
2796                         return (DCMD_OK);
2797                 }
2798         }
2799 
2800         if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2801                 mdb_warn("can't find kmem log walker");
2802                 return (DCMD_ERR);
2803         }
2804 
2805         return (DCMD_OK);
2806 }
2807 
2808 typedef struct bufctl_history_cb {
2809         int             bhc_flags;
2810         int             bhc_argc;
2811         const mdb_arg_t *bhc_argv;
2812         int             bhc_ret;
2813 } bufctl_history_cb_t;
2814 
2815 /*ARGSUSED*/
2816 static int
2817 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2818 {
2819         bufctl_history_cb_t *bhc = arg;
2820 
2821         bhc->bhc_ret =
2822             bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2823 
2824         bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2825 
2826         return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2827 }
2828 
2829 void
2830 bufctl_help(void)
2831 {
2832         mdb_printf("%s",
2833 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2834         mdb_dec_indent(2);
2835         mdb_printf("%<b>OPTIONS%</b>\n");
2836         mdb_inc_indent(2);
2837         mdb_printf("%s",
2838 "  -v    Display the full content of the bufctl, including its stack trace\n"
2839 "  -h    retrieve the bufctl's transaction history, if available\n"
2840 "  -a addr\n"
2841 "        filter out bufctls not involving the buffer at addr\n"
2842 "  -c caller\n"
2843 "        filter out bufctls without the function/PC in their stack trace\n"
2844 "  -e earliest\n"
2845 "        filter out bufctls timestamped before earliest\n"
2846 "  -l latest\n"
2847 "        filter out bufctls timestamped after latest\n"
2848 "  -t thread\n"
2849 "        filter out bufctls not involving thread\n");
2850 }
2851 
2852 int
2853 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2854 {
2855         kmem_bufctl_audit_t bc;
2856         uint_t verbose = FALSE;
2857         uint_t history = FALSE;
2858         uint_t in_history = FALSE;
2859         uintptr_t caller = NULL, thread = NULL;
2860         uintptr_t laddr, haddr, baddr = NULL;
2861         hrtime_t earliest = 0, latest = 0;
2862         int i, depth;
2863         char c[MDB_SYM_NAMLEN];
2864         GElf_Sym sym;
2865 
2866         if (mdb_getopts(argc, argv,
2867             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2868             'h', MDB_OPT_SETBITS, TRUE, &history,
2869             'H', MDB_OPT_SETBITS, TRUE, &in_history,                /* internal */
2870             'c', MDB_OPT_UINTPTR, &caller,
2871             't', MDB_OPT_UINTPTR, &thread,
2872             'e', MDB_OPT_UINT64, &earliest,
2873             'l', MDB_OPT_UINT64, &latest,
2874             'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2875                 return (DCMD_USAGE);
2876 
2877         if (!(flags & DCMD_ADDRSPEC))
2878                 return (DCMD_USAGE);
2879 
2880         if (in_history && !history)
2881                 return (DCMD_USAGE);
2882 
2883         if (history && !in_history) {
2884                 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2885                     UM_SLEEP | UM_GC);
2886                 bufctl_history_cb_t bhc;
2887 
2888                 nargv[0].a_type = MDB_TYPE_STRING;
2889                 nargv[0].a_un.a_str = "-H";             /* prevent recursion */
2890 
2891                 for (i = 0; i < argc; i++)
2892                         nargv[i + 1] = argv[i];
2893 
2894                 /*
2895                  * When in history mode, we treat each element as if it
2896                  * were in a seperate loop, so that the headers group
2897                  * bufctls with similar histories.
2898                  */
2899                 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2900                 bhc.bhc_argc = argc + 1;
2901                 bhc.bhc_argv = nargv;
2902                 bhc.bhc_ret = DCMD_OK;
2903 
2904                 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2905                     addr) == -1) {
2906                         mdb_warn("unable to walk bufctl_history");
2907                         return (DCMD_ERR);
2908                 }
2909 
2910                 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2911                         mdb_printf("\n");
2912 
2913                 return (bhc.bhc_ret);
2914         }
2915 
2916         if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2917                 if (verbose) {
2918                         mdb_printf("%16s %16s %16s %16s\n"
2919                             "%<u>%16s %16s %16s %16s%</u>\n",
2920                             "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2921                             "", "CACHE", "LASTLOG", "CONTENTS");
2922                 } else {
2923                         mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2924                             "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925                 }
2926         }
2927 
2928         if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2929                 mdb_warn("couldn't read bufctl at %p", addr);
2930                 return (DCMD_ERR);
2931         }
2932 
2933         /*
2934          * Guard against bogus bc_depth in case the bufctl is corrupt or
2935          * the address does not really refer to a bufctl.
2936          */
2937         depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2938 
2939         if (caller != NULL) {
2940                 laddr = caller;
2941                 haddr = caller + sizeof (caller);
2942 
2943                 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2944                     &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2945                         /*
2946                          * We were provided an exact symbol value; any
2947                          * address in the function is valid.
2948                          */
2949                         laddr = (uintptr_t)sym.st_value;
2950                         haddr = (uintptr_t)sym.st_value + sym.st_size;
2951                 }
2952 
2953                 for (i = 0; i < depth; i++)
2954                         if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2955                                 break;
2956 
2957                 if (i == depth)
2958                         return (DCMD_OK);
2959         }
2960 
2961         if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2962                 return (DCMD_OK);
2963 
2964         if (earliest != 0 && bc.bc_timestamp < earliest)
2965                 return (DCMD_OK);
2966 
2967         if (latest != 0 && bc.bc_timestamp > latest)
2968                 return (DCMD_OK);
2969 
2970         if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2971                 return (DCMD_OK);
2972 
2973         if (flags & DCMD_PIPE_OUT) {
2974                 mdb_printf("%#lr\n", addr);
2975                 return (DCMD_OK);
2976         }
2977 
2978         if (verbose) {
2979                 mdb_printf(
2980                     "%<b>%16p%</b> %16p %16llx %16p\n"
2981                     "%16s %16p %16p %16p\n",
2982                     addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2983                     "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2984 
2985                 mdb_inc_indent(17);
2986                 for (i = 0; i < depth; i++)
2987                         mdb_printf("%a\n", bc.bc_stack[i]);
2988                 mdb_dec_indent(17);
2989                 mdb_printf("\n");
2990         } else {
2991                 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2992                     bc.bc_timestamp, bc.bc_thread);
2993 
2994                 for (i = 0; i < depth; i++) {
2995                         if (mdb_lookup_by_addr(bc.bc_stack[i],
2996                             MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2997                                 continue;
2998                         if (strncmp(c, "kmem_", 5) == 0)
2999                                 continue;
3000                         mdb_printf(" %a\n", bc.bc_stack[i]);
3001                         break;
3002                 }
3003 
3004                 if (i >= depth)
3005                         mdb_printf("\n");
3006         }
3007 
3008         return (DCMD_OK);
3009 }
3010 
3011 typedef struct kmem_verify {
3012         uint64_t *kmv_buf;              /* buffer to read cache contents into */
3013         size_t kmv_size;                /* number of bytes in kmv_buf */
3014         int kmv_corruption;             /* > 0 if corruption found. */
3015         uint_t kmv_flags;               /* dcmd flags */
3016         struct kmem_cache kmv_cache;    /* the cache we're operating on */
3017 } kmem_verify_t;
3018 
3019 /*
3020  * verify_pattern()
3021  *      verify that buf is filled with the pattern pat.
3022  */
3023 static int64_t
3024 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3025 {
3026         /*LINTED*/
3027         uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3028         uint64_t *buf;
3029 
3030         for (buf = buf_arg; buf < bufend; buf++)
3031                 if (*buf != pat)
3032                         return ((uintptr_t)buf - (uintptr_t)buf_arg);
3033         return (-1);
3034 }
3035 
3036 /*
3037  * verify_buftag()
3038  *      verify that btp->bt_bxstat == (bcp ^ pat)
3039  */
3040 static int
3041 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3042 {
3043         return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3044 }
3045 
3046 /*
3047  * verify_free()
3048  *      verify the integrity of a free block of memory by checking
3049  *      that it is filled with 0xdeadbeef and that its buftag is sane.
3050  */
3051 /*ARGSUSED1*/
3052 static int
3053 verify_free(uintptr_t addr, const void *data, void *private)
3054 {
3055         kmem_verify_t *kmv = (kmem_verify_t *)private;
3056         uint64_t *buf = kmv->kmv_buf;        /* buf to validate */
3057         int64_t corrupt;                /* corruption offset */
3058         kmem_buftag_t *buftagp;         /* ptr to buftag */
3059         kmem_cache_t *cp = &kmv->kmv_cache;
3060         boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3061 
3062         /*LINTED*/
3063         buftagp = KMEM_BUFTAG(cp, buf);
3064 
3065         /*
3066          * Read the buffer to check.
3067          */
3068         if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3069                 if (!besilent)
3070                         mdb_warn("couldn't read %p", addr);
3071                 return (WALK_NEXT);
3072         }
3073 
3074         if ((corrupt = verify_pattern(buf, cp->cache_verify,
3075             KMEM_FREE_PATTERN)) >= 0) {
3076                 if (!besilent)
3077                         mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3078                             addr, (uintptr_t)addr + corrupt);
3079                 goto corrupt;
3080         }
3081         /*
3082          * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3083          * the first bytes of the buffer, hence we cannot check for red
3084          * zone corruption.
3085          */
3086         if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3087             buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3088                 if (!besilent)
3089                         mdb_printf("buffer %p (free) seems to "
3090                             "have a corrupt redzone pattern\n", addr);
3091                 goto corrupt;
3092         }
3093 
3094         /*
3095          * confirm bufctl pointer integrity.
3096          */
3097         if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3098                 if (!besilent)
3099                         mdb_printf("buffer %p (free) has a corrupt "
3100                             "buftag\n", addr);
3101                 goto corrupt;
3102         }
3103 
3104         return (WALK_NEXT);
3105 corrupt:
3106         if (kmv->kmv_flags & DCMD_PIPE_OUT)
3107                 mdb_printf("%p\n", addr);
3108         kmv->kmv_corruption++;
3109         return (WALK_NEXT);
3110 }
3111 
3112 /*
3113  * verify_alloc()
3114  *      Verify that the buftag of an allocated buffer makes sense with respect
3115  *      to the buffer.
3116  */
3117 /*ARGSUSED1*/
3118 static int
3119 verify_alloc(uintptr_t addr, const void *data, void *private)
3120 {
3121         kmem_verify_t *kmv = (kmem_verify_t *)private;
3122         kmem_cache_t *cp = &kmv->kmv_cache;
3123         uint64_t *buf = kmv->kmv_buf;        /* buf to validate */
3124         /*LINTED*/
3125         kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3126         uint32_t *ip = (uint32_t *)buftagp;
3127         uint8_t *bp = (uint8_t *)buf;
3128         int looks_ok = 0, size_ok = 1;  /* flags for finding corruption */
3129         boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3130 
3131         /*
3132          * Read the buffer to check.
3133          */
3134         if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3135                 if (!besilent)
3136                         mdb_warn("couldn't read %p", addr);
3137                 return (WALK_NEXT);
3138         }
3139 
3140         /*
3141          * There are two cases to handle:
3142          * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3143          *    0xfeedfacefeedface at the end of it
3144          * 2. If the buf was alloc'd using kmem_alloc, it will have
3145          *    0xbb just past the end of the region in use.  At the buftag,
3146          *    it will have 0xfeedface (or, if the whole buffer is in use,
3147          *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3148          *    endianness), followed by 32 bits containing the offset of the
3149          *    0xbb byte in the buffer.
3150          *
3151          * Finally, the two 32-bit words that comprise the second half of the
3152          * buftag should xor to KMEM_BUFTAG_ALLOC
3153          */
3154 
3155         if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3156                 looks_ok = 1;
3157         else if (!KMEM_SIZE_VALID(ip[1]))
3158                 size_ok = 0;
3159         else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3160                 looks_ok = 1;
3161         else
3162                 size_ok = 0;
3163 
3164         if (!size_ok) {
3165                 if (!besilent)
3166                         mdb_printf("buffer %p (allocated) has a corrupt "
3167                             "redzone size encoding\n", addr);
3168                 goto corrupt;
3169         }
3170 
3171         if (!looks_ok) {
3172                 if (!besilent)
3173                         mdb_printf("buffer %p (allocated) has a corrupt "
3174                             "redzone signature\n", addr);
3175                 goto corrupt;
3176         }
3177 
3178         if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3179                 if (!besilent)
3180                         mdb_printf("buffer %p (allocated) has a "
3181                             "corrupt buftag\n", addr);
3182                 goto corrupt;
3183         }
3184 
3185         return (WALK_NEXT);
3186 corrupt:
3187         if (kmv->kmv_flags & DCMD_PIPE_OUT)
3188                 mdb_printf("%p\n", addr);
3189 
3190         kmv->kmv_corruption++;
3191         return (WALK_NEXT);
3192 }
3193 
3194 /*ARGSUSED2*/
3195 int
3196 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3197 {
3198         if (flags & DCMD_ADDRSPEC) {
3199                 int check_alloc = 0, check_free = 0;
3200                 kmem_verify_t kmv;
3201 
3202                 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3203                     addr) == -1) {
3204                         mdb_warn("couldn't read kmem_cache %p", addr);
3205                         return (DCMD_ERR);
3206                 }
3207 
3208                 if ((kmv.kmv_cache.cache_dump.kd_unsafe ||
3209                     kmv.kmv_cache.cache_dump.kd_alloc_fails) &&
3210                     !(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3211                         mdb_warn("WARNING: cache was used during dump: "
3212                             "corruption may be incorrectly reported\n");
3213                 }
3214 
3215                 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3216                     sizeof (kmem_buftag_t);
3217                 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3218                 kmv.kmv_corruption = 0;
3219                 kmv.kmv_flags = flags;
3220 
3221                 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3222                         check_alloc = 1;
3223                         if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3224                                 check_free = 1;
3225                 } else {
3226                         if (!(flags & DCMD_LOOP)) {
3227                                 mdb_warn("cache %p (%s) does not have "
3228                                     "redzone checking enabled\n", addr,
3229                                     kmv.kmv_cache.cache_name);
3230                         }
3231                         return (DCMD_ERR);
3232                 }
3233 
3234                 if (!(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3235                         mdb_printf("Summary for cache '%s'\n",
3236                             kmv.kmv_cache.cache_name);
3237                         mdb_inc_indent(2);
3238                 }
3239 
3240                 if (check_alloc)
3241                         (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3242                 if (check_free)
3243                         (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3244 
3245                 if (!(flags & DCMD_PIPE_OUT)) {
3246                         if (flags & DCMD_LOOP) {
3247                                 if (kmv.kmv_corruption == 0) {
3248                                         mdb_printf("%-*s %?p clean\n",
3249                                             KMEM_CACHE_NAMELEN,
3250                                             kmv.kmv_cache.cache_name, addr);
3251                                 } else {
3252                                         mdb_printf("%-*s %?p %d corrupt "
3253                                             "buffer%s\n", KMEM_CACHE_NAMELEN,
3254                                             kmv.kmv_cache.cache_name, addr,
3255                                             kmv.kmv_corruption,
3256                                             kmv.kmv_corruption > 1 ? "s" : "");
3257                                 }
3258                         } else {
3259                                 /*
3260                                  * This is the more verbose mode, when the user
3261                                  * typed addr::kmem_verify.  If the cache was
3262                                  * clean, nothing will have yet been printed. So
3263                                  * say something.
3264                                  */
3265                                 if (kmv.kmv_corruption == 0)
3266                                         mdb_printf("clean\n");
3267 
3268                                 mdb_dec_indent(2);
3269                         }
3270                 }
3271         } else {
3272                 /*
3273                  * If the user didn't specify a cache to verify, we'll walk all
3274                  * kmem_cache's, specifying ourself as a callback for each...
3275                  * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3276                  */
3277 
3278                 if (!(flags & DCMD_PIPE_OUT)) {
3279                         uintptr_t dump_curr;
3280                         uintptr_t dump_end;
3281 
3282                         if (mdb_readvar(&dump_curr, "kmem_dump_curr") != -1 &&
3283                             mdb_readvar(&dump_end, "kmem_dump_end") != -1 &&
3284                             dump_curr == dump_end) {
3285                                 mdb_warn("WARNING: exceeded kmem_dump_size; "
3286                                     "corruption may be incorrectly reported\n");
3287                         }
3288 
3289                         mdb_printf("%<u>%-*s %-?s %-20s%</b>\n",
3290                             KMEM_CACHE_NAMELEN, "Cache Name", "Addr",
3291                             "Cache Integrity");
3292                 }
3293 
3294                 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3295         }
3296 
3297         return (DCMD_OK);
3298 }
3299 
3300 typedef struct vmem_node {
3301         struct vmem_node *vn_next;
3302         struct vmem_node *vn_parent;
3303         struct vmem_node *vn_sibling;
3304         struct vmem_node *vn_children;
3305         uintptr_t vn_addr;
3306         int vn_marked;
3307         vmem_t vn_vmem;
3308 } vmem_node_t;
3309 
3310 typedef struct vmem_walk {
3311         vmem_node_t *vw_root;
3312         vmem_node_t *vw_current;
3313 } vmem_walk_t;
3314 
3315 int
3316 vmem_walk_init(mdb_walk_state_t *wsp)
3317 {
3318         uintptr_t vaddr, paddr;
3319         vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3320         vmem_walk_t *vw;
3321 
3322         if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3323                 mdb_warn("couldn't read 'vmem_list'");
3324                 return (WALK_ERR);
3325         }
3326 
3327         while (vaddr != NULL) {
3328                 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3329                 vp->vn_addr = vaddr;
3330                 vp->vn_next = head;
3331                 head = vp;
3332 
3333                 if (vaddr == wsp->walk_addr)
3334                         current = vp;
3335 
3336                 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3337                         mdb_warn("couldn't read vmem_t at %p", vaddr);
3338                         goto err;
3339                 }
3340 
3341                 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3342         }
3343 
3344         for (vp = head; vp != NULL; vp = vp->vn_next) {
3345 
3346                 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3347                         vp->vn_sibling = root;
3348                         root = vp;
3349                         continue;
3350                 }
3351 
3352                 for (parent = head; parent != NULL; parent = parent->vn_next) {
3353                         if (parent->vn_addr != paddr)
3354                                 continue;
3355                         vp->vn_sibling = parent->vn_children;
3356                         parent->vn_children = vp;
3357                         vp->vn_parent = parent;
3358                         break;
3359                 }
3360 
3361                 if (parent == NULL) {
3362                         mdb_warn("couldn't find %p's parent (%p)\n",
3363                             vp->vn_addr, paddr);
3364                         goto err;
3365                 }
3366         }
3367 
3368         vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3369         vw->vw_root = root;
3370 
3371         if (current != NULL)
3372                 vw->vw_current = current;
3373         else
3374                 vw->vw_current = root;
3375 
3376         wsp->walk_data = vw;
3377         return (WALK_NEXT);
3378 err:
3379         for (vp = head; head != NULL; vp = head) {
3380                 head = vp->vn_next;
3381                 mdb_free(vp, sizeof (vmem_node_t));
3382         }
3383 
3384         return (WALK_ERR);
3385 }
3386 
3387 int
3388 vmem_walk_step(mdb_walk_state_t *wsp)
3389 {
3390         vmem_walk_t *vw = wsp->walk_data;
3391         vmem_node_t *vp;
3392         int rval;
3393 
3394         if ((vp = vw->vw_current) == NULL)
3395                 return (WALK_DONE);
3396 
3397         rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3398 
3399         if (vp->vn_children != NULL) {
3400                 vw->vw_current = vp->vn_children;
3401                 return (rval);
3402         }
3403 
3404         do {
3405                 vw->vw_current = vp->vn_sibling;
3406                 vp = vp->vn_parent;
3407         } while (vw->vw_current == NULL && vp != NULL);
3408 
3409         return (rval);
3410 }
3411 
3412 /*
3413  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3414  * children are visited before their parent.  We perform the postfix walk
3415  * iteratively (rather than recursively) to allow mdb to regain control
3416  * after each callback.
3417  */
3418 int
3419 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3420 {
3421         vmem_walk_t *vw = wsp->walk_data;
3422         vmem_node_t *vp = vw->vw_current;
3423         int rval;
3424 
3425         /*
3426          * If this node is marked, then we know that we have already visited
3427          * all of its children.  If the node has any siblings, they need to
3428          * be visited next; otherwise, we need to visit the parent.  Note
3429          * that vp->vn_marked will only be zero on the first invocation of
3430          * the step function.
3431          */
3432         if (vp->vn_marked) {
3433                 if (vp->vn_sibling != NULL)
3434                         vp = vp->vn_sibling;
3435                 else if (vp->vn_parent != NULL)
3436                         vp = vp->vn_parent;
3437                 else {
3438                         /*
3439                          * We have neither a parent, nor a sibling, and we
3440                          * have already been visited; we're done.
3441                          */
3442                         return (WALK_DONE);
3443                 }
3444         }
3445 
3446         /*
3447          * Before we visit this node, visit its children.
3448          */
3449         while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3450                 vp = vp->vn_children;
3451 
3452         vp->vn_marked = 1;
3453         vw->vw_current = vp;
3454         rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3455 
3456         return (rval);
3457 }
3458 
3459 void
3460 vmem_walk_fini(mdb_walk_state_t *wsp)
3461 {
3462         vmem_walk_t *vw = wsp->walk_data;
3463         vmem_node_t *root = vw->vw_root;
3464         int done;
3465 
3466         if (root == NULL)
3467                 return;
3468 
3469         if ((vw->vw_root = root->vn_children) != NULL)
3470                 vmem_walk_fini(wsp);
3471 
3472         vw->vw_root = root->vn_sibling;
3473         done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3474         mdb_free(root, sizeof (vmem_node_t));
3475 
3476         if (done) {
3477                 mdb_free(vw, sizeof (vmem_walk_t));
3478         } else {
3479                 vmem_walk_fini(wsp);
3480         }
3481 }
3482 
3483 typedef struct vmem_seg_walk {
3484         uint8_t vsw_type;
3485         uintptr_t vsw_start;
3486         uintptr_t vsw_current;
3487 } vmem_seg_walk_t;
3488 
3489 /*ARGSUSED*/
3490 int
3491 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3492 {
3493         vmem_seg_walk_t *vsw;
3494 
3495         if (wsp->walk_addr == NULL) {
3496                 mdb_warn("vmem_%s does not support global walks\n", name);
3497                 return (WALK_ERR);
3498         }
3499 
3500         wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3501 
3502         vsw->vsw_type = type;
3503         vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3504         vsw->vsw_current = vsw->vsw_start;
3505 
3506         return (WALK_NEXT);
3507 }
3508 
3509 /*
3510  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3511  */
3512 #define VMEM_NONE       0
3513 
3514 int
3515 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3516 {
3517         return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3518 }
3519 
3520 int
3521 vmem_free_walk_init(mdb_walk_state_t *wsp)
3522 {
3523         return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3524 }
3525 
3526 int
3527 vmem_span_walk_init(mdb_walk_state_t *wsp)
3528 {
3529         return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3530 }
3531 
3532 int
3533 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3534 {
3535         return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3536 }
3537 
3538 int
3539 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3540 {
3541         vmem_seg_t seg;
3542         vmem_seg_walk_t *vsw = wsp->walk_data;
3543         uintptr_t addr = vsw->vsw_current;
3544         static size_t seg_size = 0;
3545         int rval;
3546 
3547         if (!seg_size) {
3548                 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3549                         mdb_warn("failed to read 'vmem_seg_size'");
3550                         seg_size = sizeof (vmem_seg_t);
3551                 }
3552         }
3553 
3554         if (seg_size < sizeof (seg))
3555                 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3556 
3557         if (mdb_vread(&seg, seg_size, addr) == -1) {
3558                 mdb_warn("couldn't read vmem_seg at %p", addr);
3559                 return (WALK_ERR);
3560         }
3561 
3562         vsw->vsw_current = (uintptr_t)seg.vs_anext;
3563         if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3564                 rval = WALK_NEXT;
3565         } else {
3566                 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3567         }
3568 
3569         if (vsw->vsw_current == vsw->vsw_start)
3570                 return (WALK_DONE);
3571 
3572         return (rval);
3573 }
3574 
3575 void
3576 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3577 {
3578         vmem_seg_walk_t *vsw = wsp->walk_data;
3579 
3580         mdb_free(vsw, sizeof (vmem_seg_walk_t));
3581 }
3582 
3583 #define VMEM_NAMEWIDTH  22
3584 
3585 int
3586 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3587 {
3588         vmem_t v, parent;
3589         vmem_kstat_t *vkp = &v.vm_kstat;
3590         uintptr_t paddr;
3591         int ident = 0;
3592         char c[VMEM_NAMEWIDTH];
3593 
3594         if (!(flags & DCMD_ADDRSPEC)) {
3595                 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3596                         mdb_warn("can't walk vmem");
3597                         return (DCMD_ERR);
3598                 }
3599                 return (DCMD_OK);
3600         }
3601 
3602         if (DCMD_HDRSPEC(flags))
3603                 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3604                     "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3605                     "TOTAL", "SUCCEED", "FAIL");
3606 
3607         if (mdb_vread(&v, sizeof (v), addr) == -1) {
3608                 mdb_warn("couldn't read vmem at %p", addr);
3609                 return (DCMD_ERR);
3610         }
3611 
3612         for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3613                 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3614                         mdb_warn("couldn't trace %p's ancestry", addr);
3615                         ident = 0;
3616                         break;
3617                 }
3618                 paddr = (uintptr_t)parent.vm_source;
3619         }
3620 
3621         (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3622 
3623         mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3624             addr, VMEM_NAMEWIDTH, c,
3625             vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3626             vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3627 
3628         return (DCMD_OK);
3629 }
3630 
3631 void
3632 vmem_seg_help(void)
3633 {
3634         mdb_printf("%s",
3635 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3636 "\n"
3637 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3638 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3639 "information.\n");
3640         mdb_dec_indent(2);
3641         mdb_printf("%<b>OPTIONS%</b>\n");
3642         mdb_inc_indent(2);
3643         mdb_printf("%s",
3644 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3645 "  -s    report the size of the segment, instead of the end address\n"
3646 "  -c caller\n"
3647 "        filter out segments without the function/PC in their stack trace\n"
3648 "  -e earliest\n"
3649 "        filter out segments timestamped before earliest\n"
3650 "  -l latest\n"
3651 "        filter out segments timestamped after latest\n"
3652 "  -m minsize\n"
3653 "        filer out segments smaller than minsize\n"
3654 "  -M maxsize\n"
3655 "        filer out segments larger than maxsize\n"
3656 "  -t thread\n"
3657 "        filter out segments not involving thread\n"
3658 "  -T type\n"
3659 "        filter out segments not of type 'type'\n"
3660 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3661 }
3662 
3663 /*ARGSUSED*/
3664 int
3665 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3666 {
3667         vmem_seg_t vs;
3668         pc_t *stk = vs.vs_stack;
3669         uintptr_t sz;
3670         uint8_t t;
3671         const char *type = NULL;
3672         GElf_Sym sym;
3673         char c[MDB_SYM_NAMLEN];
3674         int no_debug;
3675         int i;
3676         int depth;
3677         uintptr_t laddr, haddr;
3678 
3679         uintptr_t caller = NULL, thread = NULL;
3680         uintptr_t minsize = 0, maxsize = 0;
3681 
3682         hrtime_t earliest = 0, latest = 0;
3683 
3684         uint_t size = 0;
3685         uint_t verbose = 0;
3686 
3687         if (!(flags & DCMD_ADDRSPEC))
3688                 return (DCMD_USAGE);
3689 
3690         if (mdb_getopts(argc, argv,
3691             'c', MDB_OPT_UINTPTR, &caller,
3692             'e', MDB_OPT_UINT64, &earliest,
3693             'l', MDB_OPT_UINT64, &latest,
3694             's', MDB_OPT_SETBITS, TRUE, &size,
3695             'm', MDB_OPT_UINTPTR, &minsize,
3696             'M', MDB_OPT_UINTPTR, &maxsize,
3697             't', MDB_OPT_UINTPTR, &thread,
3698             'T', MDB_OPT_STR, &type,
3699             'v', MDB_OPT_SETBITS, TRUE, &verbose,
3700             NULL) != argc)
3701                 return (DCMD_USAGE);
3702 
3703         if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3704                 if (verbose) {
3705                         mdb_printf("%16s %4s %16s %16s %16s\n"
3706                             "%<u>%16s %4s %16s %16s %16s%</u>\n",
3707                             "ADDR", "TYPE", "START", "END", "SIZE",
3708                             "", "", "THREAD", "TIMESTAMP", "");
3709                 } else {
3710                         mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3711                             "START", size? "SIZE" : "END", "WHO");
3712                 }
3713         }
3714 
3715         if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3716                 mdb_warn("couldn't read vmem_seg at %p", addr);
3717                 return (DCMD_ERR);
3718         }
3719 
3720         if (type != NULL) {
3721                 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3722                         t = VMEM_ALLOC;
3723                 else if (strcmp(type, "FREE") == 0)
3724                         t = VMEM_FREE;
3725                 else if (strcmp(type, "SPAN") == 0)
3726                         t = VMEM_SPAN;
3727                 else if (strcmp(type, "ROTR") == 0 ||
3728                     strcmp(type, "ROTOR") == 0)
3729                         t = VMEM_ROTOR;
3730                 else if (strcmp(type, "WLKR") == 0 ||
3731                     strcmp(type, "WALKER") == 0)
3732                         t = VMEM_WALKER;
3733                 else {
3734                         mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3735                             type);
3736                         return (DCMD_ERR);
3737                 }
3738 
3739                 if (vs.vs_type != t)
3740                         return (DCMD_OK);
3741         }
3742 
3743         sz = vs.vs_end - vs.vs_start;
3744 
3745         if (minsize != 0 && sz < minsize)
3746                 return (DCMD_OK);
3747 
3748         if (maxsize != 0 && sz > maxsize)
3749                 return (DCMD_OK);
3750 
3751         t = vs.vs_type;
3752         depth = vs.vs_depth;
3753 
3754         /*
3755          * debug info, when present, is only accurate for VMEM_ALLOC segments
3756          */
3757         no_debug = (t != VMEM_ALLOC) ||
3758             (depth == 0 || depth > VMEM_STACK_DEPTH);
3759 
3760         if (no_debug) {
3761                 if (caller != NULL || thread != NULL || earliest != 0 ||
3762                     latest != 0)
3763                         return (DCMD_OK);               /* not enough info */
3764         } else {
3765                 if (caller != NULL) {
3766                         laddr = caller;
3767                         haddr = caller + sizeof (caller);
3768 
3769                         if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3770                             sizeof (c), &sym) != -1 &&
3771                             caller == (uintptr_t)sym.st_value) {
3772                                 /*
3773                                  * We were provided an exact symbol value; any
3774                                  * address in the function is valid.
3775                                  */
3776                                 laddr = (uintptr_t)sym.st_value;
3777                                 haddr = (uintptr_t)sym.st_value + sym.st_size;
3778                         }
3779 
3780                         for (i = 0; i < depth; i++)
3781                                 if (vs.vs_stack[i] >= laddr &&
3782                                     vs.vs_stack[i] < haddr)
3783                                         break;
3784 
3785                         if (i == depth)
3786                                 return (DCMD_OK);
3787                 }
3788 
3789                 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3790                         return (DCMD_OK);
3791 
3792                 if (earliest != 0 && vs.vs_timestamp < earliest)
3793                         return (DCMD_OK);
3794 
3795                 if (latest != 0 && vs.vs_timestamp > latest)
3796                         return (DCMD_OK);
3797         }
3798 
3799         type = (t == VMEM_ALLOC ? "ALLC" :
3800             t == VMEM_FREE ? "FREE" :
3801             t == VMEM_SPAN ? "SPAN" :
3802             t == VMEM_ROTOR ? "ROTR" :
3803             t == VMEM_WALKER ? "WLKR" :
3804             "????");
3805 
3806         if (flags & DCMD_PIPE_OUT) {
3807                 mdb_printf("%#lr\n", addr);
3808                 return (DCMD_OK);
3809         }
3810 
3811         if (verbose) {
3812                 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3813                     addr, type, vs.vs_start, vs.vs_end, sz);
3814 
3815                 if (no_debug)
3816                         return (DCMD_OK);
3817 
3818                 mdb_printf("%16s %4s %16p %16llx\n",
3819                     "", "", vs.vs_thread, vs.vs_timestamp);
3820 
3821                 mdb_inc_indent(17);
3822                 for (i = 0; i < depth; i++) {
3823                         mdb_printf("%a\n", stk[i]);
3824                 }
3825                 mdb_dec_indent(17);
3826                 mdb_printf("\n");
3827         } else {
3828                 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3829                     vs.vs_start, size? sz : vs.vs_end);
3830 
3831                 if (no_debug) {
3832                         mdb_printf("\n");
3833                         return (DCMD_OK);
3834                 }
3835 
3836                 for (i = 0; i < depth; i++) {
3837                         if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3838                             c, sizeof (c), &sym) == -1)
3839                                 continue;
3840                         if (strncmp(c, "vmem_", 5) == 0)
3841                                 continue;
3842                         break;
3843                 }
3844                 mdb_printf(" %a\n", stk[i]);
3845         }
3846         return (DCMD_OK);
3847 }
3848 
3849 typedef struct kmalog_data {
3850         uintptr_t       kma_addr;
3851         hrtime_t        kma_newest;
3852 } kmalog_data_t;
3853 
3854 /*ARGSUSED*/
3855 static int
3856 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3857 {
3858         char name[KMEM_CACHE_NAMELEN + 1];
3859         hrtime_t delta;
3860         int i, depth;
3861         size_t bufsize;
3862 
3863         if (bcp->bc_timestamp == 0)
3864                 return (WALK_DONE);
3865 
3866         if (kma->kma_newest == 0)
3867                 kma->kma_newest = bcp->bc_timestamp;
3868 
3869         if (kma->kma_addr) {
3870                 if (mdb_vread(&bufsize, sizeof (bufsize),
3871                     (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3872                         mdb_warn(
3873                             "failed to read cache_bufsize for cache at %p",
3874                             bcp->bc_cache);
3875                         return (WALK_ERR);
3876                 }
3877 
3878                 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3879                     kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3880                         return (WALK_NEXT);
3881         }
3882 
3883         delta = kma->kma_newest - bcp->bc_timestamp;
3884         depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3885 
3886         if (mdb_readstr(name, sizeof (name), (uintptr_t)
3887             &bcp->bc_cache->cache_name) <= 0)
3888                 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3889 
3890         mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3891             delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3892 
3893         for (i = 0; i < depth; i++)
3894                 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3895 
3896         return (WALK_NEXT);
3897 }
3898 
3899 int
3900 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3901 {
3902         const char *logname = "kmem_transaction_log";
3903         kmalog_data_t kma;
3904 
3905         if (argc > 1)
3906                 return (DCMD_USAGE);
3907 
3908         kma.kma_newest = 0;
3909         if (flags & DCMD_ADDRSPEC)
3910                 kma.kma_addr = addr;
3911         else
3912                 kma.kma_addr = NULL;
3913 
3914         if (argc > 0) {
3915                 if (argv->a_type != MDB_TYPE_STRING)
3916                         return (DCMD_USAGE);
3917                 if (strcmp(argv->a_un.a_str, "fail") == 0)
3918                         logname = "kmem_failure_log";
3919                 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3920                         logname = "kmem_slab_log";
3921                 else
3922                         return (DCMD_USAGE);
3923         }
3924 
3925         if (mdb_readvar(&addr, logname) == -1) {
3926                 mdb_warn("failed to read %s log header pointer");
3927                 return (DCMD_ERR);
3928         }
3929 
3930         if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3931                 mdb_warn("failed to walk kmem log");
3932                 return (DCMD_ERR);
3933         }
3934 
3935         return (DCMD_OK);
3936 }
3937 
3938 /*
3939  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3940  * The first piece is a structure which we use to accumulate kmem_cache_t
3941  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3942  * walker; we either add all caches, or ones named explicitly as arguments.
3943  */
3944 
3945 typedef struct kmclist {
3946         const char *kmc_name;                   /* Name to match (or NULL) */
3947         uintptr_t *kmc_caches;                  /* List of kmem_cache_t addrs */
3948         int kmc_nelems;                         /* Num entries in kmc_caches */
3949         int kmc_size;                           /* Size of kmc_caches array */
3950 } kmclist_t;
3951 
3952 static int
3953 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3954 {
3955         void *p;
3956         int s;
3957 
3958         if (kmc->kmc_name == NULL ||
3959             strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3960                 /*
3961                  * If we have a match, grow our array (if necessary), and then
3962                  * add the virtual address of the matching cache to our list.
3963                  */
3964                 if (kmc->kmc_nelems >= kmc->kmc_size) {
3965                         s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3966                         p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3967 
3968                         bcopy(kmc->kmc_caches, p,
3969                             sizeof (uintptr_t) * kmc->kmc_size);
3970 
3971                         kmc->kmc_caches = p;
3972                         kmc->kmc_size = s;
3973                 }
3974 
3975                 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3976                 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3977         }
3978 
3979         return (WALK_NEXT);
3980 }
3981 
3982 /*
3983  * The second piece of ::kmausers is a hash table of allocations.  Each
3984  * allocation owner is identified by its stack trace and data_size.  We then
3985  * track the total bytes of all such allocations, and the number of allocations
3986  * to report at the end.  Once we have a list of caches, we walk through the
3987  * allocated bufctls of each, and update our hash table accordingly.
3988  */
3989 
3990 typedef struct kmowner {
3991         struct kmowner *kmo_head;               /* First hash elt in bucket */
3992         struct kmowner *kmo_next;               /* Next hash elt in chain */
3993         size_t kmo_signature;                   /* Hash table signature */
3994         uint_t kmo_num;                         /* Number of allocations */
3995         size_t kmo_data_size;                   /* Size of each allocation */
3996         size_t kmo_total_size;                  /* Total bytes of allocation */
3997         int kmo_depth;                          /* Depth of stack trace */
3998         uintptr_t kmo_stack[KMEM_STACK_DEPTH];  /* Stack trace */
3999 } kmowner_t;
4000 
4001 typedef struct kmusers {
4002         uintptr_t kmu_addr;                     /* address of interest */
4003         const kmem_cache_t *kmu_cache;          /* Current kmem cache */
4004         kmowner_t *kmu_hash;                    /* Hash table of owners */
4005         int kmu_nelems;                         /* Number of entries in use */
4006         int kmu_size;                           /* Total number of entries */
4007 } kmusers_t;
4008 
4009 static void
4010 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
4011     size_t size, size_t data_size)
4012 {
4013         int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4014         size_t bucket, signature = data_size;
4015         kmowner_t *kmo, *kmoend;
4016 
4017         /*
4018          * If the hash table is full, double its size and rehash everything.
4019          */
4020         if (kmu->kmu_nelems >= kmu->kmu_size) {
4021                 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4022 
4023                 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4024                 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4025                 kmu->kmu_hash = kmo;
4026                 kmu->kmu_size = s;
4027 
4028                 kmoend = kmu->kmu_hash + kmu->kmu_size;
4029                 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4030                         kmo->kmo_head = NULL;
4031 
4032                 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4033                 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4034                         bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4035                         kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4036                         kmu->kmu_hash[bucket].kmo_head = kmo;
4037                 }
4038         }
4039 
4040         /*
4041          * Finish computing the hash signature from the stack trace, and then
4042          * see if the owner is in the hash table.  If so, update our stats.
4043          */
4044         for (i = 0; i < depth; i++)
4045                 signature += bcp->bc_stack[i];
4046 
4047         bucket = signature & (kmu->kmu_size - 1);
4048 
4049         for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4050                 if (kmo->kmo_signature == signature) {
4051                         size_t difference = 0;
4052 
4053                         difference |= kmo->kmo_data_size - data_size;
4054                         difference |= kmo->kmo_depth - depth;
4055 
4056                         for (i = 0; i < depth; i++) {
4057                                 difference |= kmo->kmo_stack[i] -
4058                                     bcp->bc_stack[i];
4059                         }
4060 
4061                         if (difference == 0) {
4062                                 kmo->kmo_total_size += size;
4063                                 kmo->kmo_num++;
4064                                 return;
4065                         }
4066                 }
4067         }
4068 
4069         /*
4070          * If the owner is not yet hashed, grab the next element and fill it
4071          * in based on the allocation information.
4072          */
4073         kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4074         kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4075         kmu->kmu_hash[bucket].kmo_head = kmo;
4076 
4077         kmo->kmo_signature = signature;
4078         kmo->kmo_num = 1;
4079         kmo->kmo_data_size = data_size;
4080         kmo->kmo_total_size = size;
4081         kmo->kmo_depth = depth;
4082 
4083         for (i = 0; i < depth; i++)
4084                 kmo->kmo_stack[i] = bcp->bc_stack[i];
4085 }
4086 
4087 /*
4088  * When ::kmausers is invoked without the -f flag, we simply update our hash
4089  * table with the information from each allocated bufctl.
4090  */
4091 /*ARGSUSED*/
4092 static int
4093 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4094 {
4095         const kmem_cache_t *cp = kmu->kmu_cache;
4096 
4097         kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4098         return (WALK_NEXT);
4099 }
4100 
4101 /*
4102  * When ::kmausers is invoked with the -f flag, we print out the information
4103  * for each bufctl as well as updating the hash table.
4104  */
4105 static int
4106 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4107 {
4108         int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4109         const kmem_cache_t *cp = kmu->kmu_cache;
4110         kmem_bufctl_t bufctl;
4111 
4112         if (kmu->kmu_addr) {
4113                 if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4114                         mdb_warn("couldn't read bufctl at %p", addr);
4115                 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4116                     kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4117                     cp->cache_bufsize)
4118                         return (WALK_NEXT);
4119         }
4120 
4121         mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4122             cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4123 
4124         for (i = 0; i < depth; i++)
4125                 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4126 
4127         kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4128         return (WALK_NEXT);
4129 }
4130 
4131 /*
4132  * We sort our results by allocation size before printing them.
4133  */
4134 static int
4135 kmownercmp(const void *lp, const void *rp)
4136 {
4137         const kmowner_t *lhs = lp;
4138         const kmowner_t *rhs = rp;
4139 
4140         return (rhs->kmo_total_size - lhs->kmo_total_size);
4141 }
4142 
4143 /*
4144  * The main engine of ::kmausers is relatively straightforward: First we
4145  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4146  * iterate over the allocated bufctls of each cache in the list.  Finally,
4147  * we sort and print our results.
4148  */
4149 /*ARGSUSED*/
4150 int
4151 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4152 {
4153         int mem_threshold = 8192;       /* Minimum # bytes for printing */
4154         int cnt_threshold = 100;        /* Minimum # blocks for printing */
4155         int audited_caches = 0;         /* Number of KMF_AUDIT caches found */
4156         int do_all_caches = 1;          /* Do all caches (no arguments) */
4157         int opt_e = FALSE;              /* Include "small" users */
4158         int opt_f = FALSE;              /* Print stack traces */
4159 
4160         mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4161         kmowner_t *kmo, *kmoend;
4162         int i, oelems;
4163 
4164         kmclist_t kmc;
4165         kmusers_t kmu;
4166 
4167         bzero(&kmc, sizeof (kmc));
4168         bzero(&kmu, sizeof (kmu));
4169 
4170         while ((i = mdb_getopts(argc, argv,
4171             'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4172             'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4173 
4174                 argv += i;      /* skip past options we just processed */
4175                 argc -= i;      /* adjust argc */
4176 
4177                 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4178                         return (DCMD_USAGE);
4179 
4180                 oelems = kmc.kmc_nelems;
4181                 kmc.kmc_name = argv->a_un.a_str;
4182                 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4183 
4184                 if (kmc.kmc_nelems == oelems) {
4185                         mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4186                         return (DCMD_ERR);
4187                 }
4188 
4189                 do_all_caches = 0;
4190                 argv++;
4191                 argc--;
4192         }
4193 
4194         if (flags & DCMD_ADDRSPEC) {
4195                 opt_f = TRUE;
4196                 kmu.kmu_addr = addr;
4197         } else {
4198                 kmu.kmu_addr = NULL;
4199         }
4200 
4201         if (opt_e)
4202                 mem_threshold = cnt_threshold = 0;
4203 
4204         if (opt_f)
4205                 callback = (mdb_walk_cb_t)kmause2;
4206 
4207         if (do_all_caches) {
4208                 kmc.kmc_name = NULL; /* match all cache names */
4209                 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4210         }
4211 
4212         for (i = 0; i < kmc.kmc_nelems; i++) {
4213                 uintptr_t cp = kmc.kmc_caches[i];
4214                 kmem_cache_t c;
4215 
4216                 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4217                         mdb_warn("failed to read cache at %p", cp);
4218                         continue;
4219                 }
4220 
4221                 if (!(c.cache_flags & KMF_AUDIT)) {
4222                         if (!do_all_caches) {
4223                                 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4224                                     c.cache_name);
4225                         }
4226                         continue;
4227                 }
4228 
4229                 kmu.kmu_cache = &c;
4230                 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4231                 audited_caches++;
4232         }
4233 
4234         if (audited_caches == 0 && do_all_caches) {
4235                 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4236                 return (DCMD_ERR);
4237         }
4238 
4239         qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4240         kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4241 
4242         for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4243                 if (kmo->kmo_total_size < mem_threshold &&
4244                     kmo->kmo_num < cnt_threshold)
4245                         continue;
4246                 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4247                     kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4248                 for (i = 0; i < kmo->kmo_depth; i++)
4249                         mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4250         }
4251 
4252         return (DCMD_OK);
4253 }
4254 
4255 void
4256 kmausers_help(void)
4257 {
4258         mdb_printf(
4259             "Displays the largest users of the kmem allocator, sorted by \n"
4260             "trace.  If one or more caches is specified, only those caches\n"
4261             "will be searched.  By default, all caches are searched.  If an\n"
4262             "address is specified, then only those allocations which include\n"
4263             "the given address are displayed.  Specifying an address implies\n"
4264             "-f.\n"
4265             "\n"
4266             "\t-e\tInclude all users, not just the largest\n"
4267             "\t-f\tDisplay individual allocations.  By default, users are\n"
4268             "\t\tgrouped by stack\n");
4269 }
4270 
4271 static int
4272 kmem_ready_check(void)
4273 {
4274         int ready;
4275 
4276         if (mdb_readvar(&ready, "kmem_ready") < 0)
4277                 return (-1); /* errno is set for us */
4278 
4279         return (ready);
4280 }
4281 
4282 void
4283 kmem_statechange(void)
4284 {
4285         static int been_ready = 0;
4286 
4287         if (been_ready)
4288                 return;
4289 
4290         if (kmem_ready_check() <= 0)
4291                 return;
4292 
4293         been_ready = 1;
4294         (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4295 }
4296 
4297 void
4298 kmem_init(void)
4299 {
4300         mdb_walker_t w = {
4301                 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4302                 list_walk_step, list_walk_fini
4303         };
4304 
4305         /*
4306          * If kmem is ready, we'll need to invoke the kmem_cache walker
4307          * immediately.  Walkers in the linkage structure won't be ready until
4308          * _mdb_init returns, so we'll need to add this one manually.  If kmem
4309          * is ready, we'll use the walker to initialize the caches.  If kmem
4310          * isn't ready, we'll register a callback that will allow us to defer
4311          * cache walking until it is.
4312          */
4313         if (mdb_add_walker(&w) != 0) {
4314                 mdb_warn("failed to add kmem_cache walker");
4315                 return;
4316         }
4317 
4318         kmem_statechange();
4319 
4320         /* register our ::whatis handlers */
4321         mdb_whatis_register("modules", whatis_run_modules, NULL,
4322             WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4323         mdb_whatis_register("threads", whatis_run_threads, NULL,
4324             WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4325         mdb_whatis_register("pages", whatis_run_pages, NULL,
4326             WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4327         mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4328             WHATIS_PRIO_ALLOCATOR, 0);
4329         mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4330             WHATIS_PRIO_ALLOCATOR, 0);
4331 }
4332 
4333 typedef struct whatthread {
4334         uintptr_t       wt_target;
4335         int             wt_verbose;
4336 } whatthread_t;
4337 
4338 static int
4339 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4340 {
4341         uintptr_t current, data;
4342 
4343         if (t->t_stkbase == NULL)
4344                 return (WALK_NEXT);
4345 
4346         /*
4347          * Warn about swapped out threads, but drive on anyway
4348          */
4349         if (!(t->t_schedflag & TS_LOAD)) {
4350                 mdb_warn("thread %p's stack swapped out\n", addr);
4351                 return (WALK_NEXT);
4352         }
4353 
4354         /*
4355          * Search the thread's stack for the given pointer.  Note that it would
4356          * be more efficient to follow ::kgrep's lead and read in page-sized
4357          * chunks, but this routine is already fast and simple.
4358          */
4359         for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4360             current += sizeof (uintptr_t)) {
4361                 if (mdb_vread(&data, sizeof (data), current) == -1) {
4362                         mdb_warn("couldn't read thread %p's stack at %p",
4363                             addr, current);
4364                         return (WALK_ERR);
4365                 }
4366 
4367                 if (data == w->wt_target) {
4368                         if (w->wt_verbose) {
4369                                 mdb_printf("%p in thread %p's stack%s\n",
4370                                     current, addr, stack_active(t, current));
4371                         } else {
4372                                 mdb_printf("%#lr\n", addr);
4373                                 return (WALK_NEXT);
4374                         }
4375                 }
4376         }
4377 
4378         return (WALK_NEXT);
4379 }
4380 
4381 int
4382 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4383 {
4384         whatthread_t w;
4385 
4386         if (!(flags & DCMD_ADDRSPEC))
4387                 return (DCMD_USAGE);
4388 
4389         w.wt_verbose = FALSE;
4390         w.wt_target = addr;
4391 
4392         if (mdb_getopts(argc, argv,
4393             'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4394                 return (DCMD_USAGE);
4395 
4396         if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4397             == -1) {
4398                 mdb_warn("couldn't walk threads");
4399                 return (DCMD_ERR);
4400         }
4401 
4402         return (DCMD_OK);
4403 }