1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2018, Joyent, Inc.
  28  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  29  */
  30 
  31 #include "umem.h"
  32 
  33 #include <sys/vmem_impl_user.h>
  34 #include <umem_impl.h>
  35 
  36 #include <alloca.h>
  37 #include <limits.h>
  38 #include <mdb/mdb_whatis.h>
  39 #include <thr_uberdata.h>
  40 
  41 #include "misc.h"
  42 #include "leaky.h"
  43 #include "dist.h"
  44 
  45 #include "umem_pagesize.h"
  46 
  47 #define UM_ALLOCATED            0x1
  48 #define UM_FREE                 0x2
  49 #define UM_BUFCTL               0x4
  50 #define UM_HASH                 0x8
  51 
  52 int umem_ready;
  53 
  54 static int umem_stack_depth_warned;
  55 static uint32_t umem_max_ncpus;
  56 uint32_t umem_stack_depth;
  57 
  58 size_t umem_pagesize;
  59 
  60 #define UMEM_READVAR(var)                               \
  61         (umem_readvar(&(var), #var) == -1 &&                \
  62             (mdb_warn("failed to read "#var), 1))
  63 
  64 int
  65 umem_update_variables(void)
  66 {
  67         size_t pagesize;
  68 
  69         /*
  70          * Figure out which type of umem is being used; if it's not there
  71          * yet, succeed quietly.
  72          */
  73         if (umem_set_standalone() == -1) {
  74                 umem_ready = 0;
  75                 return (0);             /* umem not there yet */
  76         }
  77 
  78         /*
  79          * Solaris 9 used a different name for umem_max_ncpus.  It's
  80          * cheap backwards compatibility to check for both names.
  81          */
  82         if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
  83             umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
  84                 mdb_warn("unable to read umem_max_ncpus or max_ncpus");
  85                 return (-1);
  86         }
  87         if (UMEM_READVAR(umem_ready))
  88                 return (-1);
  89         if (UMEM_READVAR(umem_stack_depth))
  90                 return (-1);
  91         if (UMEM_READVAR(pagesize))
  92                 return (-1);
  93 
  94         if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
  95                 if (umem_stack_depth_warned == 0) {
  96                         mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
  97                             umem_stack_depth, UMEM_MAX_STACK_DEPTH);
  98                         umem_stack_depth_warned = 1;
  99                 }
 100                 umem_stack_depth = 0;
 101         }
 102 
 103         umem_pagesize = pagesize;
 104 
 105         return (0);
 106 }
 107 
 108 static int
 109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
 110 {
 111         if (wsp->walk_addr == NULL) {
 112                 if (mdb_layered_walk("ulwp", wsp) == -1) {
 113                         mdb_warn("couldn't walk 'ulwp'");
 114                         return (WALK_ERR);
 115                 }
 116         }
 117 
 118         return (WALK_NEXT);
 119 }
 120 
 121 static int
 122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
 123 {
 124         uintptr_t this;
 125         int rval;
 126 
 127         if (wsp->walk_layer != NULL) {
 128                 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
 129                     (uintptr_t)wsp->walk_arg;
 130         } else {
 131                 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
 132         }
 133 
 134         for (;;) {
 135                 if (mdb_vread(&this, sizeof (void *), this) == -1) {
 136                         mdb_warn("couldn't read ptc buffer at %p", this);
 137                         return (WALK_ERR);
 138                 }
 139 
 140                 if (this == NULL)
 141                         break;
 142 
 143                 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
 144 
 145                 if (rval != WALK_NEXT)
 146                         return (rval);
 147         }
 148 
 149         return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
 150 }
 151 
 152 /*ARGSUSED*/
 153 static int
 154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
 155 {
 156         mdb_walker_t w;
 157         char descr[64];
 158         char name[64];
 159         int i;
 160 
 161         (void) mdb_snprintf(descr, sizeof (descr),
 162             "walk the %s cache", c->cache_name);
 163 
 164         w.walk_name = c->cache_name;
 165         w.walk_descr = descr;
 166         w.walk_init = umem_walk_init;
 167         w.walk_step = umem_walk_step;
 168         w.walk_fini = umem_walk_fini;
 169         w.walk_init_arg = (void *)addr;
 170 
 171         if (mdb_add_walker(&w) == -1)
 172                 mdb_warn("failed to add %s walker", c->cache_name);
 173 
 174         if (!(c->cache_flags & UMF_PTC))
 175                 return (WALK_NEXT);
 176 
 177         /*
 178          * For the per-thread cache walker, the address is the offset in the
 179          * tm_roots[] array of the ulwp_t.
 180          */
 181         for (i = 0; sizes[i] != 0; i++) {
 182                 if (sizes[i] == c->cache_bufsize)
 183                         break;
 184         }
 185 
 186         if (sizes[i] == 0) {
 187                 mdb_warn("cache %s is cached per-thread, but could not find "
 188                     "size in umem_alloc_sizes\n", c->cache_name);
 189                 return (WALK_NEXT);
 190         }
 191 
 192         if (i >= NTMEMBASE) {
 193                 mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
 194                     c->cache_name, i, NTMEMBASE);
 195                 return (WALK_NEXT);
 196         }
 197 
 198         (void) mdb_snprintf(name, sizeof (name),
 199             "umem_ptc_%d", c->cache_bufsize);
 200         (void) mdb_snprintf(descr, sizeof (descr),
 201             "walk the per-thread cache for %s", c->cache_name);
 202 
 203         w.walk_name = name;
 204         w.walk_descr = descr;
 205         w.walk_init = umem_ptc_walk_init;
 206         w.walk_step = umem_ptc_walk_step;
 207         w.walk_fini = NULL;
 208         w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
 209 
 210         if (mdb_add_walker(&w) == -1)
 211                 mdb_warn("failed to add %s walker", w.walk_name);
 212 
 213         return (WALK_NEXT);
 214 }
 215 
 216 /*ARGSUSED*/
 217 static void
 218 umem_statechange_cb(void *arg)
 219 {
 220         static int been_ready = 0;
 221         GElf_Sym sym;
 222         int *sizes;
 223 
 224 #ifndef _KMDB
 225         leaky_cleanup(1);       /* state changes invalidate leaky state */
 226 #endif
 227 
 228         if (umem_update_variables() == -1)
 229                 return;
 230 
 231         if (been_ready)
 232                 return;
 233 
 234         if (umem_ready != UMEM_READY)
 235                 return;
 236 
 237         been_ready = 1;
 238 
 239         /*
 240          * In order to determine the tm_roots offset of any cache that is
 241          * cached per-thread, we need to have the umem_alloc_sizes array.
 242          * Read this, assuring that it is zero-terminated.
 243          */
 244         if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
 245                 mdb_warn("unable to lookup 'umem_alloc_sizes'");
 246                 return;
 247         }
 248 
 249         sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
 250 
 251         if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
 252                 mdb_warn("couldn't read 'umem_alloc_sizes'");
 253                 return;
 254         }
 255 
 256         (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
 257 }
 258 
 259 int
 260 umem_abort_messages(void)
 261 {
 262         char *umem_error_buffer;
 263         uint_t umem_error_begin;
 264         GElf_Sym sym;
 265         size_t bufsize;
 266 
 267         if (UMEM_READVAR(umem_error_begin))
 268                 return (DCMD_ERR);
 269 
 270         if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
 271                 mdb_warn("unable to look up umem_error_buffer");
 272                 return (DCMD_ERR);
 273         }
 274 
 275         bufsize = (size_t)sym.st_size;
 276 
 277         umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
 278 
 279         if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
 280             != bufsize) {
 281                 mdb_warn("unable to read umem_error_buffer");
 282                 return (DCMD_ERR);
 283         }
 284         /* put a zero after the end of the buffer to simplify printing */
 285         umem_error_buffer[bufsize] = 0;
 286 
 287         if ((umem_error_begin % bufsize) == 0)
 288                 mdb_printf("%s\n", umem_error_buffer);
 289         else {
 290                 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
 291                 mdb_printf("%s%s\n",
 292                     &umem_error_buffer[umem_error_begin % bufsize],
 293                     umem_error_buffer);
 294         }
 295 
 296         return (DCMD_OK);
 297 }
 298 
 299 static void
 300 umem_log_status(const char *name, umem_log_header_t *val)
 301 {
 302         umem_log_header_t my_lh;
 303         uintptr_t pos = (uintptr_t)val;
 304         size_t size;
 305 
 306         if (pos == NULL)
 307                 return;
 308 
 309         if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
 310                 mdb_warn("\nunable to read umem_%s_log pointer %p",
 311                     name, pos);
 312                 return;
 313         }
 314 
 315         size = my_lh.lh_chunksize * my_lh.lh_nchunks;
 316 
 317         if (size % (1024 * 1024) == 0)
 318                 mdb_printf("%s=%dm ", name, size / (1024 * 1024));
 319         else if (size % 1024 == 0)
 320                 mdb_printf("%s=%dk ", name, size / 1024);
 321         else
 322                 mdb_printf("%s=%d ", name, size);
 323 }
 324 
 325 typedef struct umem_debug_flags {
 326         const char      *udf_name;
 327         uint_t          udf_flags;
 328         uint_t          udf_clear;      /* if 0, uses udf_flags */
 329 } umem_debug_flags_t;
 330 
 331 umem_debug_flags_t umem_status_flags[] = {
 332         { "random",     UMF_RANDOMIZE,  UMF_RANDOM },
 333         { "default",    UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
 334         { "audit",      UMF_AUDIT },
 335         { "guards",     UMF_DEADBEEF | UMF_REDZONE },
 336         { "nosignal",   UMF_CHECKSIGNAL },
 337         { "firewall",   UMF_FIREWALL },
 338         { "lite",       UMF_LITE },
 339         { "checknull",  UMF_CHECKNULL },
 340         { NULL }
 341 };
 342 
 343 /*ARGSUSED*/
 344 int
 345 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
 346 {
 347         int umem_logging;
 348 
 349         umem_log_header_t *umem_transaction_log;
 350         umem_log_header_t *umem_content_log;
 351         umem_log_header_t *umem_failure_log;
 352         umem_log_header_t *umem_slab_log;
 353 
 354         mdb_printf("Status:\t\t%s\n",
 355             umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
 356             umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
 357             umem_ready == UMEM_READY_INITING ? "initialization in process" :
 358             umem_ready == UMEM_READY ? "ready and active" :
 359             umem_ready == 0 ? "not loaded into address space" :
 360             "unknown (umem_ready invalid)");
 361 
 362         if (umem_ready == 0)
 363                 return (DCMD_OK);
 364 
 365         mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
 366 
 367         if (UMEM_READVAR(umem_logging))
 368                 goto err;
 369         if (UMEM_READVAR(umem_transaction_log))
 370                 goto err;
 371         if (UMEM_READVAR(umem_content_log))
 372                 goto err;
 373         if (UMEM_READVAR(umem_failure_log))
 374                 goto err;
 375         if (UMEM_READVAR(umem_slab_log))
 376                 goto err;
 377 
 378         mdb_printf("Logs:\t\t");
 379         umem_log_status("transaction", umem_transaction_log);
 380         umem_log_status("content", umem_content_log);
 381         umem_log_status("fail", umem_failure_log);
 382         umem_log_status("slab", umem_slab_log);
 383         if (!umem_logging)
 384                 mdb_printf("(inactive)");
 385         mdb_printf("\n");
 386 
 387         mdb_printf("Message buffer:\n");
 388         return (umem_abort_messages());
 389 
 390 err:
 391         mdb_printf("Message buffer:\n");
 392         (void) umem_abort_messages();
 393         return (DCMD_ERR);
 394 }
 395 
 396 typedef struct {
 397         uintptr_t ucw_first;
 398         uintptr_t ucw_current;
 399 } umem_cache_walk_t;
 400 
 401 int
 402 umem_cache_walk_init(mdb_walk_state_t *wsp)
 403 {
 404         umem_cache_walk_t *ucw;
 405         umem_cache_t c;
 406         uintptr_t cp;
 407         GElf_Sym sym;
 408 
 409         if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
 410                 mdb_warn("couldn't find umem_null_cache");
 411                 return (WALK_ERR);
 412         }
 413 
 414         cp = (uintptr_t)sym.st_value;
 415 
 416         if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
 417                 mdb_warn("couldn't read cache at %p", cp);
 418                 return (WALK_ERR);
 419         }
 420 
 421         ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
 422 
 423         ucw->ucw_first = cp;
 424         ucw->ucw_current = (uintptr_t)c.cache_next;
 425         wsp->walk_data = ucw;
 426 
 427         return (WALK_NEXT);
 428 }
 429 
 430 int
 431 umem_cache_walk_step(mdb_walk_state_t *wsp)
 432 {
 433         umem_cache_walk_t *ucw = wsp->walk_data;
 434         umem_cache_t c;
 435         int status;
 436 
 437         if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
 438                 mdb_warn("couldn't read cache at %p", ucw->ucw_current);
 439                 return (WALK_DONE);
 440         }
 441 
 442         status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
 443 
 444         if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
 445                 return (WALK_DONE);
 446 
 447         return (status);
 448 }
 449 
 450 void
 451 umem_cache_walk_fini(mdb_walk_state_t *wsp)
 452 {
 453         umem_cache_walk_t *ucw = wsp->walk_data;
 454         mdb_free(ucw, sizeof (umem_cache_walk_t));
 455 }
 456 
 457 typedef struct {
 458         umem_cpu_t *ucw_cpus;
 459         uint32_t ucw_current;
 460         uint32_t ucw_max;
 461 } umem_cpu_walk_state_t;
 462 
 463 int
 464 umem_cpu_walk_init(mdb_walk_state_t *wsp)
 465 {
 466         umem_cpu_t *umem_cpus;
 467 
 468         umem_cpu_walk_state_t *ucw;
 469 
 470         if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
 471                 mdb_warn("failed to read 'umem_cpus'");
 472                 return (WALK_ERR);
 473         }
 474 
 475         ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
 476 
 477         ucw->ucw_cpus = umem_cpus;
 478         ucw->ucw_current = 0;
 479         ucw->ucw_max = umem_max_ncpus;
 480 
 481         wsp->walk_data = ucw;
 482         return (WALK_NEXT);
 483 }
 484 
 485 int
 486 umem_cpu_walk_step(mdb_walk_state_t *wsp)
 487 {
 488         umem_cpu_t cpu;
 489         umem_cpu_walk_state_t *ucw = wsp->walk_data;
 490 
 491         uintptr_t caddr;
 492 
 493         if (ucw->ucw_current >= ucw->ucw_max)
 494                 return (WALK_DONE);
 495 
 496         caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
 497 
 498         if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
 499                 mdb_warn("failed to read cpu %d", ucw->ucw_current);
 500                 return (WALK_ERR);
 501         }
 502 
 503         ucw->ucw_current++;
 504 
 505         return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
 506 }
 507 
 508 void
 509 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
 510 {
 511         umem_cpu_walk_state_t *ucw = wsp->walk_data;
 512 
 513         mdb_free(ucw, sizeof (*ucw));
 514 }
 515 
 516 int
 517 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
 518 {
 519         if (wsp->walk_addr == NULL) {
 520                 mdb_warn("umem_cpu_cache doesn't support global walks");
 521                 return (WALK_ERR);
 522         }
 523 
 524         if (mdb_layered_walk("umem_cpu", wsp) == -1) {
 525                 mdb_warn("couldn't walk 'umem_cpu'");
 526                 return (WALK_ERR);
 527         }
 528 
 529         wsp->walk_data = (void *)wsp->walk_addr;
 530 
 531         return (WALK_NEXT);
 532 }
 533 
 534 int
 535 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
 536 {
 537         uintptr_t caddr = (uintptr_t)wsp->walk_data;
 538         const umem_cpu_t *cpu = wsp->walk_layer;
 539         umem_cpu_cache_t cc;
 540 
 541         caddr += cpu->cpu_cache_offset;
 542 
 543         if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
 544                 mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
 545                 return (WALK_ERR);
 546         }
 547 
 548         return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
 549 }
 550 
 551 int
 552 umem_slab_walk_init(mdb_walk_state_t *wsp)
 553 {
 554         uintptr_t caddr = wsp->walk_addr;
 555         umem_cache_t c;
 556 
 557         if (caddr == NULL) {
 558                 mdb_warn("umem_slab doesn't support global walks\n");
 559                 return (WALK_ERR);
 560         }
 561 
 562         if (mdb_vread(&c, sizeof (c), caddr) == -1) {
 563                 mdb_warn("couldn't read umem_cache at %p", caddr);
 564                 return (WALK_ERR);
 565         }
 566 
 567         wsp->walk_data =
 568             (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
 569         wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
 570 
 571         return (WALK_NEXT);
 572 }
 573 
 574 int
 575 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
 576 {
 577         uintptr_t caddr = wsp->walk_addr;
 578         umem_cache_t c;
 579 
 580         if (caddr == NULL) {
 581                 mdb_warn("umem_slab_partial doesn't support global walks\n");
 582                 return (WALK_ERR);
 583         }
 584 
 585         if (mdb_vread(&c, sizeof (c), caddr) == -1) {
 586                 mdb_warn("couldn't read umem_cache at %p", caddr);
 587                 return (WALK_ERR);
 588         }
 589 
 590         wsp->walk_data =
 591             (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
 592         wsp->walk_addr = (uintptr_t)c.cache_freelist;
 593 
 594         /*
 595          * Some consumers (umem_walk_step(), in particular) require at
 596          * least one callback if there are any buffers in the cache.  So
 597          * if there are *no* partial slabs, report the last full slab, if
 598          * any.
 599          *
 600          * Yes, this is ugly, but it's cleaner than the other possibilities.
 601          */
 602         if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
 603                 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
 604 
 605         return (WALK_NEXT);
 606 }
 607 
 608 int
 609 umem_slab_walk_step(mdb_walk_state_t *wsp)
 610 {
 611         umem_slab_t s;
 612         uintptr_t addr = wsp->walk_addr;
 613         uintptr_t saddr = (uintptr_t)wsp->walk_data;
 614         uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
 615 
 616         if (addr == saddr)
 617                 return (WALK_DONE);
 618 
 619         if (mdb_vread(&s, sizeof (s), addr) == -1) {
 620                 mdb_warn("failed to read slab at %p", wsp->walk_addr);
 621                 return (WALK_ERR);
 622         }
 623 
 624         if ((uintptr_t)s.slab_cache != caddr) {
 625                 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
 626                     addr, caddr, s.slab_cache);
 627                 return (WALK_ERR);
 628         }
 629 
 630         wsp->walk_addr = (uintptr_t)s.slab_next;
 631 
 632         return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
 633 }
 634 
 635 int
 636 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
 637 {
 638         umem_cache_t c;
 639 
 640         if (!(flags & DCMD_ADDRSPEC)) {
 641                 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
 642                         mdb_warn("can't walk umem_cache");
 643                         return (DCMD_ERR);
 644                 }
 645                 return (DCMD_OK);
 646         }
 647 
 648         if (DCMD_HDRSPEC(flags))
 649                 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
 650                     "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
 651 
 652         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 653                 mdb_warn("couldn't read umem_cache at %p", addr);
 654                 return (DCMD_ERR);
 655         }
 656 
 657         mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
 658             c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
 659 
 660         return (DCMD_OK);
 661 }
 662 
 663 static int
 664 addrcmp(const void *lhs, const void *rhs)
 665 {
 666         uintptr_t p1 = *((uintptr_t *)lhs);
 667         uintptr_t p2 = *((uintptr_t *)rhs);
 668 
 669         if (p1 < p2)
 670                 return (-1);
 671         if (p1 > p2)
 672                 return (1);
 673         return (0);
 674 }
 675 
 676 static int
 677 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
 678 {
 679         const umem_bufctl_audit_t *bcp1 = *lhs;
 680         const umem_bufctl_audit_t *bcp2 = *rhs;
 681 
 682         if (bcp1->bc_timestamp > bcp2->bc_timestamp)
 683                 return (-1);
 684 
 685         if (bcp1->bc_timestamp < bcp2->bc_timestamp)
 686                 return (1);
 687 
 688         return (0);
 689 }
 690 
 691 typedef struct umem_hash_walk {
 692         uintptr_t *umhw_table;
 693         size_t umhw_nelems;
 694         size_t umhw_pos;
 695         umem_bufctl_t umhw_cur;
 696 } umem_hash_walk_t;
 697 
 698 int
 699 umem_hash_walk_init(mdb_walk_state_t *wsp)
 700 {
 701         umem_hash_walk_t *umhw;
 702         uintptr_t *hash;
 703         umem_cache_t c;
 704         uintptr_t haddr, addr = wsp->walk_addr;
 705         size_t nelems;
 706         size_t hsize;
 707 
 708         if (addr == NULL) {
 709                 mdb_warn("umem_hash doesn't support global walks\n");
 710                 return (WALK_ERR);
 711         }
 712 
 713         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 714                 mdb_warn("couldn't read cache at addr %p", addr);
 715                 return (WALK_ERR);
 716         }
 717 
 718         if (!(c.cache_flags & UMF_HASH)) {
 719                 mdb_warn("cache %p doesn't have a hash table\n", addr);
 720                 return (WALK_DONE);             /* nothing to do */
 721         }
 722 
 723         umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
 724         umhw->umhw_cur.bc_next = NULL;
 725         umhw->umhw_pos = 0;
 726 
 727         umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
 728         hsize = nelems * sizeof (uintptr_t);
 729         haddr = (uintptr_t)c.cache_hash_table;
 730 
 731         umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
 732         if (mdb_vread(hash, hsize, haddr) == -1) {
 733                 mdb_warn("failed to read hash table at %p", haddr);
 734                 mdb_free(hash, hsize);
 735                 mdb_free(umhw, sizeof (umem_hash_walk_t));
 736                 return (WALK_ERR);
 737         }
 738 
 739         wsp->walk_data = umhw;
 740 
 741         return (WALK_NEXT);
 742 }
 743 
 744 int
 745 umem_hash_walk_step(mdb_walk_state_t *wsp)
 746 {
 747         umem_hash_walk_t *umhw = wsp->walk_data;
 748         uintptr_t addr = NULL;
 749 
 750         if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
 751                 while (umhw->umhw_pos < umhw->umhw_nelems) {
 752                         if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
 753                                 break;
 754                 }
 755         }
 756         if (addr == NULL)
 757                 return (WALK_DONE);
 758 
 759         if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
 760                 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
 761                 return (WALK_ERR);
 762         }
 763 
 764         return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
 765 }
 766 
 767 void
 768 umem_hash_walk_fini(mdb_walk_state_t *wsp)
 769 {
 770         umem_hash_walk_t *umhw = wsp->walk_data;
 771 
 772         if (umhw == NULL)
 773                 return;
 774 
 775         mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
 776         mdb_free(umhw, sizeof (umem_hash_walk_t));
 777 }
 778 
 779 /*
 780  * Find the address of the bufctl structure for the address 'buf' in cache
 781  * 'cp', which is at address caddr, and place it in *out.
 782  */
 783 static int
 784 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
 785 {
 786         uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
 787         umem_bufctl_t *bcp;
 788         umem_bufctl_t bc;
 789 
 790         if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
 791                 mdb_warn("unable to read hash bucket for %p in cache %p",
 792                     buf, caddr);
 793                 return (-1);
 794         }
 795 
 796         while (bcp != NULL) {
 797                 if (mdb_vread(&bc, sizeof (umem_bufctl_t),
 798                     (uintptr_t)bcp) == -1) {
 799                         mdb_warn("unable to read bufctl at %p", bcp);
 800                         return (-1);
 801                 }
 802                 if (bc.bc_addr == buf) {
 803                         *out = (uintptr_t)bcp;
 804                         return (0);
 805                 }
 806                 bcp = bc.bc_next;
 807         }
 808 
 809         mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
 810         return (-1);
 811 }
 812 
 813 int
 814 umem_get_magsize(const umem_cache_t *cp)
 815 {
 816         uintptr_t addr = (uintptr_t)cp->cache_magtype;
 817         GElf_Sym mt_sym;
 818         umem_magtype_t mt;
 819         int res;
 820 
 821         /*
 822          * if cpu 0 has a non-zero magsize, it must be correct.  caches
 823          * with UMF_NOMAGAZINE have disabled their magazine layers, so
 824          * it is okay to return 0 for them.
 825          */
 826         if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
 827             (cp->cache_flags & UMF_NOMAGAZINE))
 828                 return (res);
 829 
 830         if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
 831                 mdb_warn("unable to read 'umem_magtype'");
 832         } else if (addr < mt_sym.st_value ||
 833             addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
 834             ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
 835                 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
 836                     cp->cache_name, addr);
 837                 return (0);
 838         }
 839         if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
 840                 mdb_warn("unable to read magtype at %a", addr);
 841                 return (0);
 842         }
 843         return (mt.mt_magsize);
 844 }
 845 
 846 /*ARGSUSED*/
 847 static int
 848 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
 849 {
 850         *est -= (sp->slab_chunks - sp->slab_refcnt);
 851 
 852         return (WALK_NEXT);
 853 }
 854 
 855 /*
 856  * Returns an upper bound on the number of allocated buffers in a given
 857  * cache.
 858  */
 859 size_t
 860 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
 861 {
 862         int magsize;
 863         size_t cache_est;
 864 
 865         cache_est = cp->cache_buftotal;
 866 
 867         (void) mdb_pwalk("umem_slab_partial",
 868             (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
 869 
 870         if ((magsize = umem_get_magsize(cp)) != 0) {
 871                 size_t mag_est = cp->cache_full.ml_total * magsize;
 872 
 873                 if (cache_est >= mag_est) {
 874                         cache_est -= mag_est;
 875                 } else {
 876                         mdb_warn("cache %p's magazine layer holds more buffers "
 877                             "than the slab layer.\n", addr);
 878                 }
 879         }
 880         return (cache_est);
 881 }
 882 
 883 #define READMAG_ROUNDS(rounds) { \
 884         if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
 885                 mdb_warn("couldn't read magazine at %p", ump); \
 886                 goto fail; \
 887         } \
 888         for (i = 0; i < rounds; i++) { \
 889                 maglist[magcnt++] = mp->mag_round[i]; \
 890                 if (magcnt == magmax) { \
 891                         mdb_warn("%d magazines exceeds fudge factor\n", \
 892                             magcnt); \
 893                         goto fail; \
 894                 } \
 895         } \
 896 }
 897 
 898 static int
 899 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
 900     void ***maglistp, size_t *magcntp, size_t *magmaxp)
 901 {
 902         umem_magazine_t *ump, *mp;
 903         void **maglist = NULL;
 904         int i, cpu;
 905         size_t magsize, magmax, magbsize;
 906         size_t magcnt = 0;
 907 
 908         /*
 909          * Read the magtype out of the cache, after verifying the pointer's
 910          * correctness.
 911          */
 912         magsize = umem_get_magsize(cp);
 913         if (magsize == 0) {
 914                 *maglistp = NULL;
 915                 *magcntp = 0;
 916                 *magmaxp = 0;
 917                 return (0);
 918         }
 919 
 920         /*
 921          * There are several places where we need to go buffer hunting:
 922          * the per-CPU loaded magazine, the per-CPU spare full magazine,
 923          * and the full magazine list in the depot.
 924          *
 925          * For an upper bound on the number of buffers in the magazine
 926          * layer, we have the number of magazines on the cache_full
 927          * list plus at most two magazines per CPU (the loaded and the
 928          * spare).  Toss in 100 magazines as a fudge factor in case this
 929          * is live (the number "100" comes from the same fudge factor in
 930          * crash(1M)).
 931          */
 932         magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
 933         magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
 934 
 935         if (magbsize >= PAGESIZE / 2) {
 936                 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
 937                     addr, magbsize);
 938                 return (-1);
 939         }
 940 
 941         maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
 942         mp = mdb_alloc(magbsize, UM_SLEEP);
 943         if (mp == NULL || maglist == NULL)
 944                 goto fail;
 945 
 946         /*
 947          * First up: the magazines in the depot (i.e. on the cache_full list).
 948          */
 949         for (ump = cp->cache_full.ml_list; ump != NULL; ) {
 950                 READMAG_ROUNDS(magsize);
 951                 ump = mp->mag_next;
 952 
 953                 if (ump == cp->cache_full.ml_list)
 954                         break; /* cache_full list loop detected */
 955         }
 956 
 957         dprintf(("cache_full list done\n"));
 958 
 959         /*
 960          * Now whip through the CPUs, snagging the loaded magazines
 961          * and full spares.
 962          */
 963         for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
 964                 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
 965 
 966                 dprintf(("reading cpu cache %p\n",
 967                     (uintptr_t)ccp - (uintptr_t)cp + addr));
 968 
 969                 if (ccp->cc_rounds > 0 &&
 970                     (ump = ccp->cc_loaded) != NULL) {
 971                         dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
 972                         READMAG_ROUNDS(ccp->cc_rounds);
 973                 }
 974 
 975                 if (ccp->cc_prounds > 0 &&
 976                     (ump = ccp->cc_ploaded) != NULL) {
 977                         dprintf(("reading %d previously loaded rounds\n",
 978                             ccp->cc_prounds));
 979                         READMAG_ROUNDS(ccp->cc_prounds);
 980                 }
 981         }
 982 
 983         dprintf(("magazine layer: %d buffers\n", magcnt));
 984 
 985         mdb_free(mp, magbsize);
 986 
 987         *maglistp = maglist;
 988         *magcntp = magcnt;
 989         *magmaxp = magmax;
 990 
 991         return (0);
 992 
 993 fail:
 994         if (mp)
 995                 mdb_free(mp, magbsize);
 996         if (maglist)
 997                 mdb_free(maglist, magmax * sizeof (void *));
 998 
 999         return (-1);
1000 }
1001 
1002 typedef struct umem_read_ptc_walk {
1003         void **urpw_buf;
1004         size_t urpw_cnt;
1005         size_t urpw_max;
1006 } umem_read_ptc_walk_t;
1007 
1008 /*ARGSUSED*/
1009 static int
1010 umem_read_ptc_walk_buf(uintptr_t addr,
1011     const void *ignored, umem_read_ptc_walk_t *urpw)
1012 {
1013         if (urpw->urpw_cnt == urpw->urpw_max) {
1014                 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1015                 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1016 
1017                 if (nmax > 1) {
1018                         size_t osize = urpw->urpw_max * sizeof (void *);
1019                         bcopy(urpw->urpw_buf, new, osize);
1020                         mdb_free(urpw->urpw_buf, osize);
1021                 }
1022 
1023                 urpw->urpw_buf = new;
1024                 urpw->urpw_max = nmax;
1025         }
1026 
1027         urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1028 
1029         return (WALK_NEXT);
1030 }
1031 
1032 static int
1033 umem_read_ptc(umem_cache_t *cp,
1034     void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1035 {
1036         umem_read_ptc_walk_t urpw;
1037         char walk[60];
1038         int rval;
1039 
1040         if (!(cp->cache_flags & UMF_PTC))
1041                 return (0);
1042 
1043         (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1044             cp->cache_bufsize);
1045 
1046         urpw.urpw_buf = *buflistp;
1047         urpw.urpw_cnt = *bufcntp;
1048         urpw.urpw_max = *bufmaxp;
1049 
1050         if ((rval = mdb_walk(walk,
1051             (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1052                 mdb_warn("couldn't walk %s", walk);
1053         }
1054 
1055         *buflistp = urpw.urpw_buf;
1056         *bufcntp = urpw.urpw_cnt;
1057         *bufmaxp = urpw.urpw_max;
1058 
1059         return (rval);
1060 }
1061 
1062 static int
1063 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1064 {
1065         return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1066 }
1067 
1068 static int
1069 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1070 {
1071         umem_bufctl_audit_t *b;
1072         UMEM_LOCAL_BUFCTL_AUDIT(&b);
1073 
1074         /*
1075          * if UMF_AUDIT is not set, we know that we're looking at a
1076          * umem_bufctl_t.
1077          */
1078         if (!(cp->cache_flags & UMF_AUDIT) ||
1079             mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1080                 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1081                 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1082                         mdb_warn("unable to read bufctl at %p", buf);
1083                         return (WALK_ERR);
1084                 }
1085         }
1086 
1087         return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1088 }
1089 
1090 typedef struct umem_walk {
1091         int umw_type;
1092 
1093         uintptr_t umw_addr;             /* cache address */
1094         umem_cache_t *umw_cp;
1095         size_t umw_csize;
1096 
1097         /*
1098          * magazine layer
1099          */
1100         void **umw_maglist;
1101         size_t umw_max;
1102         size_t umw_count;
1103         size_t umw_pos;
1104 
1105         /*
1106          * slab layer
1107          */
1108         char *umw_valid;        /* to keep track of freed buffers */
1109         char *umw_ubase;        /* buffer for slab data */
1110 } umem_walk_t;
1111 
1112 static int
1113 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1114 {
1115         umem_walk_t *umw;
1116         int csize;
1117         umem_cache_t *cp;
1118         size_t vm_quantum;
1119 
1120         size_t magmax, magcnt;
1121         void **maglist = NULL;
1122         uint_t chunksize, slabsize;
1123         int status = WALK_ERR;
1124         uintptr_t addr = wsp->walk_addr;
1125         const char *layered;
1126 
1127         type &= ~UM_HASH;
1128 
1129         if (addr == NULL) {
1130                 mdb_warn("umem walk doesn't support global walks\n");
1131                 return (WALK_ERR);
1132         }
1133 
1134         dprintf(("walking %p\n", addr));
1135 
1136         /*
1137          * The number of "cpus" determines how large the cache is.
1138          */
1139         csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1140         cp = mdb_alloc(csize, UM_SLEEP);
1141 
1142         if (mdb_vread(cp, csize, addr) == -1) {
1143                 mdb_warn("couldn't read cache at addr %p", addr);
1144                 goto out2;
1145         }
1146 
1147         /*
1148          * It's easy for someone to hand us an invalid cache address.
1149          * Unfortunately, it is hard for this walker to survive an
1150          * invalid cache cleanly.  So we make sure that:
1151          *
1152          *      1. the vmem arena for the cache is readable,
1153          *      2. the vmem arena's quantum is a power of 2,
1154          *      3. our slabsize is a multiple of the quantum, and
1155          *      4. our chunksize is >0 and less than our slabsize.
1156          */
1157         if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1158             (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1159             vm_quantum == 0 ||
1160             (vm_quantum & (vm_quantum - 1)) != 0 ||
1161             cp->cache_slabsize < vm_quantum ||
1162             P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1163             cp->cache_chunksize == 0 ||
1164             cp->cache_chunksize > cp->cache_slabsize) {
1165                 mdb_warn("%p is not a valid umem_cache_t\n", addr);
1166                 goto out2;
1167         }
1168 
1169         dprintf(("buf total is %d\n", cp->cache_buftotal));
1170 
1171         if (cp->cache_buftotal == 0) {
1172                 mdb_free(cp, csize);
1173                 return (WALK_DONE);
1174         }
1175 
1176         /*
1177          * If they ask for bufctls, but it's a small-slab cache,
1178          * there is nothing to report.
1179          */
1180         if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1181                 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1182                     cp->cache_flags));
1183                 mdb_free(cp, csize);
1184                 return (WALK_DONE);
1185         }
1186 
1187         /*
1188          * Read in the contents of the magazine layer
1189          */
1190         if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1191                 goto out2;
1192 
1193         /*
1194          * Read in the contents of the per-thread caches, if any
1195          */
1196         if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1197                 goto out2;
1198 
1199         /*
1200          * We have all of the buffers from the magazines and from the
1201          * per-thread cache (if any);  if we are walking allocated buffers,
1202          * sort them so we can bsearch them later.
1203          */
1204         if (type & UM_ALLOCATED)
1205                 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1206 
1207         wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1208 
1209         umw->umw_type = type;
1210         umw->umw_addr = addr;
1211         umw->umw_cp = cp;
1212         umw->umw_csize = csize;
1213         umw->umw_maglist = maglist;
1214         umw->umw_max = magmax;
1215         umw->umw_count = magcnt;
1216         umw->umw_pos = 0;
1217 
1218         /*
1219          * When walking allocated buffers in a UMF_HASH cache, we walk the
1220          * hash table instead of the slab layer.
1221          */
1222         if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1223                 layered = "umem_hash";
1224 
1225                 umw->umw_type |= UM_HASH;
1226         } else {
1227                 /*
1228                  * If we are walking freed buffers, we only need the
1229                  * magazine layer plus the partially allocated slabs.
1230                  * To walk allocated buffers, we need all of the slabs.
1231                  */
1232                 if (type & UM_ALLOCATED)
1233                         layered = "umem_slab";
1234                 else
1235                         layered = "umem_slab_partial";
1236 
1237                 /*
1238                  * for small-slab caches, we read in the entire slab.  For
1239                  * freed buffers, we can just walk the freelist.  For
1240                  * allocated buffers, we use a 'valid' array to track
1241                  * the freed buffers.
1242                  */
1243                 if (!(cp->cache_flags & UMF_HASH)) {
1244                         chunksize = cp->cache_chunksize;
1245                         slabsize = cp->cache_slabsize;
1246 
1247                         umw->umw_ubase = mdb_alloc(slabsize +
1248                             sizeof (umem_bufctl_t), UM_SLEEP);
1249 
1250                         if (type & UM_ALLOCATED)
1251                                 umw->umw_valid =
1252                                     mdb_alloc(slabsize / chunksize, UM_SLEEP);
1253                 }
1254         }
1255 
1256         status = WALK_NEXT;
1257 
1258         if (mdb_layered_walk(layered, wsp) == -1) {
1259                 mdb_warn("unable to start layered '%s' walk", layered);
1260                 status = WALK_ERR;
1261         }
1262 
1263 out1:
1264         if (status == WALK_ERR) {
1265                 if (umw->umw_valid)
1266                         mdb_free(umw->umw_valid, slabsize / chunksize);
1267 
1268                 if (umw->umw_ubase)
1269                         mdb_free(umw->umw_ubase, slabsize +
1270                             sizeof (umem_bufctl_t));
1271 
1272                 if (umw->umw_maglist)
1273                         mdb_free(umw->umw_maglist, umw->umw_max *
1274                             sizeof (uintptr_t));
1275 
1276                 mdb_free(umw, sizeof (umem_walk_t));
1277                 wsp->walk_data = NULL;
1278         }
1279 
1280 out2:
1281         if (status == WALK_ERR)
1282                 mdb_free(cp, csize);
1283 
1284         return (status);
1285 }
1286 
1287 int
1288 umem_walk_step(mdb_walk_state_t *wsp)
1289 {
1290         umem_walk_t *umw = wsp->walk_data;
1291         int type = umw->umw_type;
1292         umem_cache_t *cp = umw->umw_cp;
1293 
1294         void **maglist = umw->umw_maglist;
1295         int magcnt = umw->umw_count;
1296 
1297         uintptr_t chunksize, slabsize;
1298         uintptr_t addr;
1299         const umem_slab_t *sp;
1300         const umem_bufctl_t *bcp;
1301         umem_bufctl_t bc;
1302 
1303         int chunks;
1304         char *kbase;
1305         void *buf;
1306         int i, ret;
1307 
1308         char *valid, *ubase;
1309 
1310         /*
1311          * first, handle the 'umem_hash' layered walk case
1312          */
1313         if (type & UM_HASH) {
1314                 /*
1315                  * We have a buffer which has been allocated out of the
1316                  * global layer. We need to make sure that it's not
1317                  * actually sitting in a magazine before we report it as
1318                  * an allocated buffer.
1319                  */
1320                 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1321 
1322                 if (magcnt > 0 &&
1323                     bsearch(&buf, maglist, magcnt, sizeof (void *),
1324                     addrcmp) != NULL)
1325                         return (WALK_NEXT);
1326 
1327                 if (type & UM_BUFCTL)
1328                         return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1329 
1330                 return (umem_walk_callback(wsp, (uintptr_t)buf));
1331         }
1332 
1333         ret = WALK_NEXT;
1334 
1335         addr = umw->umw_addr;
1336 
1337         /*
1338          * If we're walking freed buffers, report everything in the
1339          * magazine layer before processing the first slab.
1340          */
1341         if ((type & UM_FREE) && magcnt != 0) {
1342                 umw->umw_count = 0;          /* only do this once */
1343                 for (i = 0; i < magcnt; i++) {
1344                         buf = maglist[i];
1345 
1346                         if (type & UM_BUFCTL) {
1347                                 uintptr_t out;
1348 
1349                                 if (cp->cache_flags & UMF_BUFTAG) {
1350                                         umem_buftag_t *btp;
1351                                         umem_buftag_t tag;
1352 
1353                                         /* LINTED - alignment */
1354                                         btp = UMEM_BUFTAG(cp, buf);
1355                                         if (mdb_vread(&tag, sizeof (tag),
1356                                             (uintptr_t)btp) == -1) {
1357                                                 mdb_warn("reading buftag for "
1358                                                     "%p at %p", buf, btp);
1359                                                 continue;
1360                                         }
1361                                         out = (uintptr_t)tag.bt_bufctl;
1362                                 } else {
1363                                         if (umem_hash_lookup(cp, addr, buf,
1364                                             &out) == -1)
1365                                                 continue;
1366                                 }
1367                                 ret = bufctl_walk_callback(cp, wsp, out);
1368                         } else {
1369                                 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1370                         }
1371 
1372                         if (ret != WALK_NEXT)
1373                                 return (ret);
1374                 }
1375         }
1376 
1377         /*
1378          * Handle the buffers in the current slab
1379          */
1380         chunksize = cp->cache_chunksize;
1381         slabsize = cp->cache_slabsize;
1382 
1383         sp = wsp->walk_layer;
1384         chunks = sp->slab_chunks;
1385         kbase = sp->slab_base;
1386 
1387         dprintf(("kbase is %p\n", kbase));
1388 
1389         if (!(cp->cache_flags & UMF_HASH)) {
1390                 valid = umw->umw_valid;
1391                 ubase = umw->umw_ubase;
1392 
1393                 if (mdb_vread(ubase, chunks * chunksize,
1394                     (uintptr_t)kbase) == -1) {
1395                         mdb_warn("failed to read slab contents at %p", kbase);
1396                         return (WALK_ERR);
1397                 }
1398 
1399                 /*
1400                  * Set up the valid map as fully allocated -- we'll punch
1401                  * out the freelist.
1402                  */
1403                 if (type & UM_ALLOCATED)
1404                         (void) memset(valid, 1, chunks);
1405         } else {
1406                 valid = NULL;
1407                 ubase = NULL;
1408         }
1409 
1410         /*
1411          * walk the slab's freelist
1412          */
1413         bcp = sp->slab_head;
1414 
1415         dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1416 
1417         /*
1418          * since we could be in the middle of allocating a buffer,
1419          * our refcnt could be one higher than it aught.  So we
1420          * check one further on the freelist than the count allows.
1421          */
1422         for (i = sp->slab_refcnt; i <= chunks; i++) {
1423                 uint_t ndx;
1424 
1425                 dprintf(("bcp is %p\n", bcp));
1426 
1427                 if (bcp == NULL) {
1428                         if (i == chunks)
1429                                 break;
1430                         mdb_warn(
1431                             "slab %p in cache %p freelist too short by %d\n",
1432                             sp, addr, chunks - i);
1433                         break;
1434                 }
1435 
1436                 if (cp->cache_flags & UMF_HASH) {
1437                         if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1438                                 mdb_warn("failed to read bufctl ptr at %p",
1439                                     bcp);
1440                                 break;
1441                         }
1442                         buf = bc.bc_addr;
1443                 } else {
1444                         /*
1445                          * Otherwise the buffer is (or should be) in the slab
1446                          * that we've read in; determine its offset in the
1447                          * slab, validate that it's not corrupt, and add to
1448                          * our base address to find the umem_bufctl_t.  (Note
1449                          * that we don't need to add the size of the bufctl
1450                          * to our offset calculation because of the slop that's
1451                          * allocated for the buffer at ubase.)
1452                          */
1453                         uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1454 
1455                         if (offs > chunks * chunksize) {
1456                                 mdb_warn("found corrupt bufctl ptr %p"
1457                                     " in slab %p in cache %p\n", bcp,
1458                                     wsp->walk_addr, addr);
1459                                 break;
1460                         }
1461 
1462                         bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1463                         buf = UMEM_BUF(cp, bcp);
1464                 }
1465 
1466                 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1467 
1468                 if (ndx > slabsize / cp->cache_bufsize) {
1469                         /*
1470                          * This is very wrong; we have managed to find
1471                          * a buffer in the slab which shouldn't
1472                          * actually be here.  Emit a warning, and
1473                          * try to continue.
1474                          */
1475                         mdb_warn("buf %p is out of range for "
1476                             "slab %p, cache %p\n", buf, sp, addr);
1477                 } else if (type & UM_ALLOCATED) {
1478                         /*
1479                          * we have found a buffer on the slab's freelist;
1480                          * clear its entry
1481                          */
1482                         valid[ndx] = 0;
1483                 } else {
1484                         /*
1485                          * Report this freed buffer
1486                          */
1487                         if (type & UM_BUFCTL) {
1488                                 ret = bufctl_walk_callback(cp, wsp,
1489                                     (uintptr_t)bcp);
1490                         } else {
1491                                 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1492                         }
1493                         if (ret != WALK_NEXT)
1494                                 return (ret);
1495                 }
1496 
1497                 bcp = bc.bc_next;
1498         }
1499 
1500         if (bcp != NULL) {
1501                 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1502                     sp, addr, bcp));
1503         }
1504 
1505         /*
1506          * If we are walking freed buffers, the loop above handled reporting
1507          * them.
1508          */
1509         if (type & UM_FREE)
1510                 return (WALK_NEXT);
1511 
1512         if (type & UM_BUFCTL) {
1513                 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1514                     "cache %p\n", addr);
1515                 return (WALK_ERR);
1516         }
1517 
1518         /*
1519          * Report allocated buffers, skipping buffers in the magazine layer.
1520          * We only get this far for small-slab caches.
1521          */
1522         for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1523                 buf = (char *)kbase + i * chunksize;
1524 
1525                 if (!valid[i])
1526                         continue;               /* on slab freelist */
1527 
1528                 if (magcnt > 0 &&
1529                     bsearch(&buf, maglist, magcnt, sizeof (void *),
1530                     addrcmp) != NULL)
1531                         continue;               /* in magazine layer */
1532 
1533                 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1534         }
1535         return (ret);
1536 }
1537 
1538 void
1539 umem_walk_fini(mdb_walk_state_t *wsp)
1540 {
1541         umem_walk_t *umw = wsp->walk_data;
1542         uintptr_t chunksize;
1543         uintptr_t slabsize;
1544 
1545         if (umw == NULL)
1546                 return;
1547 
1548         if (umw->umw_maglist != NULL)
1549                 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1550 
1551         chunksize = umw->umw_cp->cache_chunksize;
1552         slabsize = umw->umw_cp->cache_slabsize;
1553 
1554         if (umw->umw_valid != NULL)
1555                 mdb_free(umw->umw_valid, slabsize / chunksize);
1556         if (umw->umw_ubase != NULL)
1557                 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1558 
1559         mdb_free(umw->umw_cp, umw->umw_csize);
1560         mdb_free(umw, sizeof (umem_walk_t));
1561 }
1562 
1563 /*ARGSUSED*/
1564 static int
1565 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1566 {
1567         /*
1568          * Buffers allocated from NOTOUCH caches can also show up as freed
1569          * memory in other caches.  This can be a little confusing, so we
1570          * don't walk NOTOUCH caches when walking all caches (thereby assuring
1571          * that "::walk umem" and "::walk freemem" yield disjoint output).
1572          */
1573         if (c->cache_cflags & UMC_NOTOUCH)
1574                 return (WALK_NEXT);
1575 
1576         if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1577             wsp->walk_cbdata, addr) == -1)
1578                 return (WALK_DONE);
1579 
1580         return (WALK_NEXT);
1581 }
1582 
1583 #define UMEM_WALK_ALL(name, wsp) { \
1584         wsp->walk_data = (name); \
1585         if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1586                 return (WALK_ERR); \
1587         return (WALK_DONE); \
1588 }
1589 
1590 int
1591 umem_walk_init(mdb_walk_state_t *wsp)
1592 {
1593         if (wsp->walk_arg != NULL)
1594                 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1595 
1596         if (wsp->walk_addr == NULL)
1597                 UMEM_WALK_ALL("umem", wsp);
1598         return (umem_walk_init_common(wsp, UM_ALLOCATED));
1599 }
1600 
1601 int
1602 bufctl_walk_init(mdb_walk_state_t *wsp)
1603 {
1604         if (wsp->walk_addr == NULL)
1605                 UMEM_WALK_ALL("bufctl", wsp);
1606         return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1607 }
1608 
1609 int
1610 freemem_walk_init(mdb_walk_state_t *wsp)
1611 {
1612         if (wsp->walk_addr == NULL)
1613                 UMEM_WALK_ALL("freemem", wsp);
1614         return (umem_walk_init_common(wsp, UM_FREE));
1615 }
1616 
1617 int
1618 freectl_walk_init(mdb_walk_state_t *wsp)
1619 {
1620         if (wsp->walk_addr == NULL)
1621                 UMEM_WALK_ALL("freectl", wsp);
1622         return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1623 }
1624 
1625 typedef struct bufctl_history_walk {
1626         void            *bhw_next;
1627         umem_cache_t    *bhw_cache;
1628         umem_slab_t     *bhw_slab;
1629         hrtime_t        bhw_timestamp;
1630 } bufctl_history_walk_t;
1631 
1632 int
1633 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1634 {
1635         bufctl_history_walk_t *bhw;
1636         umem_bufctl_audit_t bc;
1637         umem_bufctl_audit_t bcn;
1638 
1639         if (wsp->walk_addr == NULL) {
1640                 mdb_warn("bufctl_history walk doesn't support global walks\n");
1641                 return (WALK_ERR);
1642         }
1643 
1644         if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1645                 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1646                 return (WALK_ERR);
1647         }
1648 
1649         bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1650         bhw->bhw_timestamp = 0;
1651         bhw->bhw_cache = bc.bc_cache;
1652         bhw->bhw_slab = bc.bc_slab;
1653 
1654         /*
1655          * sometimes the first log entry matches the base bufctl;  in that
1656          * case, skip the base bufctl.
1657          */
1658         if (bc.bc_lastlog != NULL &&
1659             mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1660             bc.bc_addr == bcn.bc_addr &&
1661             bc.bc_cache == bcn.bc_cache &&
1662             bc.bc_slab == bcn.bc_slab &&
1663             bc.bc_timestamp == bcn.bc_timestamp &&
1664             bc.bc_thread == bcn.bc_thread)
1665                 bhw->bhw_next = bc.bc_lastlog;
1666         else
1667                 bhw->bhw_next = (void *)wsp->walk_addr;
1668 
1669         wsp->walk_addr = (uintptr_t)bc.bc_addr;
1670         wsp->walk_data = bhw;
1671 
1672         return (WALK_NEXT);
1673 }
1674 
1675 int
1676 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1677 {
1678         bufctl_history_walk_t *bhw = wsp->walk_data;
1679         uintptr_t addr = (uintptr_t)bhw->bhw_next;
1680         uintptr_t baseaddr = wsp->walk_addr;
1681         umem_bufctl_audit_t *b;
1682         UMEM_LOCAL_BUFCTL_AUDIT(&b);
1683 
1684         if (addr == NULL)
1685                 return (WALK_DONE);
1686 
1687         if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1688                 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1689                 return (WALK_ERR);
1690         }
1691 
1692         /*
1693          * The bufctl is only valid if the address, cache, and slab are
1694          * correct.  We also check that the timestamp is decreasing, to
1695          * prevent infinite loops.
1696          */
1697         if ((uintptr_t)b->bc_addr != baseaddr ||
1698             b->bc_cache != bhw->bhw_cache ||
1699             b->bc_slab != bhw->bhw_slab ||
1700             (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1701                 return (WALK_DONE);
1702 
1703         bhw->bhw_next = b->bc_lastlog;
1704         bhw->bhw_timestamp = b->bc_timestamp;
1705 
1706         return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1707 }
1708 
1709 void
1710 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1711 {
1712         bufctl_history_walk_t *bhw = wsp->walk_data;
1713 
1714         mdb_free(bhw, sizeof (*bhw));
1715 }
1716 
1717 typedef struct umem_log_walk {
1718         umem_bufctl_audit_t *ulw_base;
1719         umem_bufctl_audit_t **ulw_sorted;
1720         umem_log_header_t ulw_lh;
1721         size_t ulw_size;
1722         size_t ulw_maxndx;
1723         size_t ulw_ndx;
1724 } umem_log_walk_t;
1725 
1726 int
1727 umem_log_walk_init(mdb_walk_state_t *wsp)
1728 {
1729         uintptr_t lp = wsp->walk_addr;
1730         umem_log_walk_t *ulw;
1731         umem_log_header_t *lhp;
1732         int maxndx, i, j, k;
1733 
1734         /*
1735          * By default (global walk), walk the umem_transaction_log.  Otherwise
1736          * read the log whose umem_log_header_t is stored at walk_addr.
1737          */
1738         if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1739                 mdb_warn("failed to read 'umem_transaction_log'");
1740                 return (WALK_ERR);
1741         }
1742 
1743         if (lp == NULL) {
1744                 mdb_warn("log is disabled\n");
1745                 return (WALK_ERR);
1746         }
1747 
1748         ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1749         lhp = &ulw->ulw_lh;
1750 
1751         if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1752                 mdb_warn("failed to read log header at %p", lp);
1753                 mdb_free(ulw, sizeof (umem_log_walk_t));
1754                 return (WALK_ERR);
1755         }
1756 
1757         ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1758         ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1759         maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1760 
1761         if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1762             (uintptr_t)lhp->lh_base) == -1) {
1763                 mdb_warn("failed to read log at base %p", lhp->lh_base);
1764                 mdb_free(ulw->ulw_base, ulw->ulw_size);
1765                 mdb_free(ulw, sizeof (umem_log_walk_t));
1766                 return (WALK_ERR);
1767         }
1768 
1769         ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1770             sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1771 
1772         for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1773                 caddr_t chunk = (caddr_t)
1774                     ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1775 
1776                 for (j = 0; j < maxndx; j++) {
1777                         /* LINTED align */
1778                         ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1779                         chunk += UMEM_BUFCTL_AUDIT_SIZE;
1780                 }
1781         }
1782 
1783         qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1784             (int(*)(const void *, const void *))bufctlcmp);
1785 
1786         ulw->ulw_maxndx = k;
1787         wsp->walk_data = ulw;
1788 
1789         return (WALK_NEXT);
1790 }
1791 
1792 int
1793 umem_log_walk_step(mdb_walk_state_t *wsp)
1794 {
1795         umem_log_walk_t *ulw = wsp->walk_data;
1796         umem_bufctl_audit_t *bcp;
1797 
1798         if (ulw->ulw_ndx == ulw->ulw_maxndx)
1799                 return (WALK_DONE);
1800 
1801         bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1802 
1803         return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1804             (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1805 }
1806 
1807 void
1808 umem_log_walk_fini(mdb_walk_state_t *wsp)
1809 {
1810         umem_log_walk_t *ulw = wsp->walk_data;
1811 
1812         mdb_free(ulw->ulw_base, ulw->ulw_size);
1813         mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1814             sizeof (umem_bufctl_audit_t *));
1815         mdb_free(ulw, sizeof (umem_log_walk_t));
1816 }
1817 
1818 typedef struct allocdby_bufctl {
1819         uintptr_t abb_addr;
1820         hrtime_t abb_ts;
1821 } allocdby_bufctl_t;
1822 
1823 typedef struct allocdby_walk {
1824         const char *abw_walk;
1825         uintptr_t abw_thread;
1826         size_t abw_nbufs;
1827         size_t abw_size;
1828         allocdby_bufctl_t *abw_buf;
1829         size_t abw_ndx;
1830 } allocdby_walk_t;
1831 
1832 int
1833 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1834     allocdby_walk_t *abw)
1835 {
1836         if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1837                 return (WALK_NEXT);
1838 
1839         if (abw->abw_nbufs == abw->abw_size) {
1840                 allocdby_bufctl_t *buf;
1841                 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1842 
1843                 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1844 
1845                 bcopy(abw->abw_buf, buf, oldsize);
1846                 mdb_free(abw->abw_buf, oldsize);
1847 
1848                 abw->abw_size <<= 1;
1849                 abw->abw_buf = buf;
1850         }
1851 
1852         abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1853         abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1854         abw->abw_nbufs++;
1855 
1856         return (WALK_NEXT);
1857 }
1858 
1859 /*ARGSUSED*/
1860 int
1861 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1862 {
1863         if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1864             abw, addr) == -1) {
1865                 mdb_warn("couldn't walk bufctl for cache %p", addr);
1866                 return (WALK_DONE);
1867         }
1868 
1869         return (WALK_NEXT);
1870 }
1871 
1872 static int
1873 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1874 {
1875         if (lhs->abb_ts < rhs->abb_ts)
1876                 return (1);
1877         if (lhs->abb_ts > rhs->abb_ts)
1878                 return (-1);
1879         return (0);
1880 }
1881 
1882 static int
1883 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1884 {
1885         allocdby_walk_t *abw;
1886 
1887         if (wsp->walk_addr == NULL) {
1888                 mdb_warn("allocdby walk doesn't support global walks\n");
1889                 return (WALK_ERR);
1890         }
1891 
1892         abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1893 
1894         abw->abw_thread = wsp->walk_addr;
1895         abw->abw_walk = walk;
1896         abw->abw_size = 128; /* something reasonable */
1897         abw->abw_buf =
1898             mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1899 
1900         wsp->walk_data = abw;
1901 
1902         if (mdb_walk("umem_cache",
1903             (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1904                 mdb_warn("couldn't walk umem_cache");
1905                 allocdby_walk_fini(wsp);
1906                 return (WALK_ERR);
1907         }
1908 
1909         qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1910             (int(*)(const void *, const void *))allocdby_cmp);
1911 
1912         return (WALK_NEXT);
1913 }
1914 
1915 int
1916 allocdby_walk_init(mdb_walk_state_t *wsp)
1917 {
1918         return (allocdby_walk_init_common(wsp, "bufctl"));
1919 }
1920 
1921 int
1922 freedby_walk_init(mdb_walk_state_t *wsp)
1923 {
1924         return (allocdby_walk_init_common(wsp, "freectl"));
1925 }
1926 
1927 int
1928 allocdby_walk_step(mdb_walk_state_t *wsp)
1929 {
1930         allocdby_walk_t *abw = wsp->walk_data;
1931         uintptr_t addr;
1932         umem_bufctl_audit_t *bcp;
1933         UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1934 
1935         if (abw->abw_ndx == abw->abw_nbufs)
1936                 return (WALK_DONE);
1937 
1938         addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1939 
1940         if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1941                 mdb_warn("couldn't read bufctl at %p", addr);
1942                 return (WALK_DONE);
1943         }
1944 
1945         return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1946 }
1947 
1948 void
1949 allocdby_walk_fini(mdb_walk_state_t *wsp)
1950 {
1951         allocdby_walk_t *abw = wsp->walk_data;
1952 
1953         mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1954         mdb_free(abw, sizeof (allocdby_walk_t));
1955 }
1956 
1957 /*ARGSUSED*/
1958 int
1959 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1960 {
1961         char c[MDB_SYM_NAMLEN];
1962         GElf_Sym sym;
1963         int i;
1964 
1965         mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1966         for (i = 0; i < bcp->bc_depth; i++) {
1967                 if (mdb_lookup_by_addr(bcp->bc_stack[i],
1968                     MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1969                         continue;
1970                 if (is_umem_sym(c, "umem_"))
1971                         continue;
1972                 mdb_printf("%s+0x%lx",
1973                     c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1974                 break;
1975         }
1976         mdb_printf("\n");
1977 
1978         return (WALK_NEXT);
1979 }
1980 
1981 static int
1982 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1983 {
1984         if (!(flags & DCMD_ADDRSPEC))
1985                 return (DCMD_USAGE);
1986 
1987         mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1988 
1989         if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1990                 mdb_warn("can't walk '%s' for %p", w, addr);
1991                 return (DCMD_ERR);
1992         }
1993 
1994         return (DCMD_OK);
1995 }
1996 
1997 /*ARGSUSED*/
1998 int
1999 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2000 {
2001         return (allocdby_common(addr, flags, "allocdby"));
2002 }
2003 
2004 /*ARGSUSED*/
2005 int
2006 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2007 {
2008         return (allocdby_common(addr, flags, "freedby"));
2009 }
2010 
2011 typedef struct whatis_info {
2012         mdb_whatis_t *wi_w;
2013         const umem_cache_t *wi_cache;
2014         const vmem_t *wi_vmem;
2015         vmem_t *wi_msb_arena;
2016         size_t wi_slab_size;
2017         int wi_slab_found;
2018         uint_t wi_freemem;
2019 } whatis_info_t;
2020 
2021 /* call one of our dcmd functions with "-v" and the provided address */
2022 static void
2023 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2024 {
2025         mdb_arg_t a;
2026         a.a_type = MDB_TYPE_STRING;
2027         a.a_un.a_str = "-v";
2028 
2029         mdb_printf(":\n");
2030         (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2031 }
2032 
2033 static void
2034 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2035     uintptr_t baddr)
2036 {
2037         mdb_whatis_t *w = wi->wi_w;
2038         const umem_cache_t *cp = wi->wi_cache;
2039         int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2040 
2041         int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2042 
2043         mdb_whatis_report_object(w, maddr, addr, "");
2044 
2045         if (baddr != 0 && !call_printer)
2046                 mdb_printf("bufctl %p ", baddr);
2047 
2048         mdb_printf("%s from %s",
2049             (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2050 
2051         if (call_printer && baddr != 0) {
2052                 whatis_call_printer(bufctl, baddr);
2053                 return;
2054         }
2055         mdb_printf("\n");
2056 }
2057 
2058 /*ARGSUSED*/
2059 static int
2060 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2061 {
2062         mdb_whatis_t *w = wi->wi_w;
2063 
2064         uintptr_t cur;
2065         size_t size = wi->wi_cache->cache_bufsize;
2066 
2067         while (mdb_whatis_match(w, addr, size, &cur))
2068                 whatis_print_umem(wi, cur, addr, NULL);
2069 
2070         return (WHATIS_WALKRET(w));
2071 }
2072 
2073 /*ARGSUSED*/
2074 static int
2075 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2076 {
2077         mdb_whatis_t *w = wi->wi_w;
2078 
2079         uintptr_t cur;
2080         uintptr_t addr = (uintptr_t)bcp->bc_addr;
2081         size_t size = wi->wi_cache->cache_bufsize;
2082 
2083         while (mdb_whatis_match(w, addr, size, &cur))
2084                 whatis_print_umem(wi, cur, addr, baddr);
2085 
2086         return (WHATIS_WALKRET(w));
2087 }
2088 
2089 
2090 static int
2091 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2092 {
2093         mdb_whatis_t *w = wi->wi_w;
2094 
2095         size_t size = vs->vs_end - vs->vs_start;
2096         uintptr_t cur;
2097 
2098         /* We're not interested in anything but alloc and free segments */
2099         if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2100                 return (WALK_NEXT);
2101 
2102         while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2103                 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2104 
2105                 /*
2106                  * If we're not printing it seperately, provide the vmem_seg
2107                  * pointer if it has a stack trace.
2108                  */
2109                 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2110                     ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2111                     (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2112                         mdb_printf("vmem_seg %p ", addr);
2113                 }
2114 
2115                 mdb_printf("%s from %s vmem arena",
2116                     (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2117                     wi->wi_vmem->vm_name);
2118 
2119                 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2120                         whatis_call_printer(vmem_seg, addr);
2121                 else
2122                         mdb_printf("\n");
2123         }
2124 
2125         return (WHATIS_WALKRET(w));
2126 }
2127 
2128 static int
2129 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2130 {
2131         mdb_whatis_t *w = wi->wi_w;
2132         const char *nm = vmem->vm_name;
2133         wi->wi_vmem = vmem;
2134 
2135         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2136                 mdb_printf("Searching vmem arena %s...\n", nm);
2137 
2138         if (mdb_pwalk("vmem_seg",
2139             (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2140                 mdb_warn("can't walk vmem seg for %p", addr);
2141                 return (WALK_NEXT);
2142         }
2143 
2144         return (WHATIS_WALKRET(w));
2145 }
2146 
2147 /*ARGSUSED*/
2148 static int
2149 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2150 {
2151         mdb_whatis_t *w = wi->wi_w;
2152 
2153         /* It must overlap with the slab data, or it's not interesting */
2154         if (mdb_whatis_overlaps(w,
2155             (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2156                 wi->wi_slab_found++;
2157                 return (WALK_DONE);
2158         }
2159         return (WALK_NEXT);
2160 }
2161 
2162 static int
2163 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2164 {
2165         mdb_whatis_t *w = wi->wi_w;
2166         char *walk, *freewalk;
2167         mdb_walk_cb_t func;
2168         int do_bufctl;
2169 
2170         /* Override the '-b' flag as necessary */
2171         if (!(c->cache_flags & UMF_HASH))
2172                 do_bufctl = FALSE;      /* no bufctls to walk */
2173         else if (c->cache_flags & UMF_AUDIT)
2174                 do_bufctl = TRUE;       /* we always want debugging info */
2175         else
2176                 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2177 
2178         if (do_bufctl) {
2179                 walk = "bufctl";
2180                 freewalk = "freectl";
2181                 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2182         } else {
2183                 walk = "umem";
2184                 freewalk = "freemem";
2185                 func = (mdb_walk_cb_t)whatis_walk_umem;
2186         }
2187 
2188         wi->wi_cache = c;
2189 
2190         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2191                 mdb_printf("Searching %s...\n", c->cache_name);
2192 
2193         /*
2194          * If more then two buffers live on each slab, figure out if we're
2195          * interested in anything in any slab before doing the more expensive
2196          * umem/freemem (bufctl/freectl) walkers.
2197          */
2198         wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2199         if (!(c->cache_flags & UMF_HASH))
2200                 wi->wi_slab_size -= sizeof (umem_slab_t);
2201 
2202         if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2203                 wi->wi_slab_found = 0;
2204                 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2205                     addr) == -1) {
2206                         mdb_warn("can't find umem_slab walker");
2207                         return (WALK_DONE);
2208                 }
2209                 if (wi->wi_slab_found == 0)
2210                         return (WALK_NEXT);
2211         }
2212 
2213         wi->wi_freemem = FALSE;
2214         if (mdb_pwalk(walk, func, wi, addr) == -1) {
2215                 mdb_warn("can't find %s walker", walk);
2216                 return (WALK_DONE);
2217         }
2218 
2219         if (mdb_whatis_done(w))
2220                 return (WALK_DONE);
2221 
2222         /*
2223          * We have searched for allocated memory; now search for freed memory.
2224          */
2225         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2226                 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2227 
2228         wi->wi_freemem = TRUE;
2229 
2230         if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2231                 mdb_warn("can't find %s walker", freewalk);
2232                 return (WALK_DONE);
2233         }
2234 
2235         return (WHATIS_WALKRET(w));
2236 }
2237 
2238 static int
2239 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2240 {
2241         if (c->cache_arena == wi->wi_msb_arena ||
2242             (c->cache_cflags & UMC_NOTOUCH))
2243                 return (WALK_NEXT);
2244 
2245         return (whatis_walk_cache(addr, c, wi));
2246 }
2247 
2248 static int
2249 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2250 {
2251         if (c->cache_arena != wi->wi_msb_arena)
2252                 return (WALK_NEXT);
2253 
2254         return (whatis_walk_cache(addr, c, wi));
2255 }
2256 
2257 static int
2258 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2259 {
2260         if (c->cache_arena == wi->wi_msb_arena ||
2261             !(c->cache_cflags & UMC_NOTOUCH))
2262                 return (WALK_NEXT);
2263 
2264         return (whatis_walk_cache(addr, c, wi));
2265 }
2266 
2267 /*ARGSUSED*/
2268 static int
2269 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2270 {
2271         whatis_info_t wi;
2272 
2273         bzero(&wi, sizeof (wi));
2274         wi.wi_w = w;
2275 
2276         /* umem's metadata is allocated from the umem_internal_arena */
2277         if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2278                 mdb_warn("unable to readvar \"umem_internal_arena\"");
2279 
2280         /*
2281          * We process umem caches in the following order:
2282          *
2283          *      non-UMC_NOTOUCH, non-metadata   (typically the most interesting)
2284          *      metadata                        (can be huge with UMF_AUDIT)
2285          *      UMC_NOTOUCH, non-metadata       (see umem_walk_all())
2286          */
2287         if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2288             &wi) == -1 ||
2289             mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2290             &wi) == -1 ||
2291             mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2292             &wi) == -1) {
2293                 mdb_warn("couldn't find umem_cache walker");
2294                 return (1);
2295         }
2296         return (0);
2297 }
2298 
2299 /*ARGSUSED*/
2300 static int
2301 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2302 {
2303         whatis_info_t wi;
2304 
2305         bzero(&wi, sizeof (wi));
2306         wi.wi_w = w;
2307 
2308         if (mdb_walk("vmem_postfix",
2309             (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2310                 mdb_warn("couldn't find vmem_postfix walker");
2311                 return (1);
2312         }
2313         return (0);
2314 }
2315 
2316 int
2317 umem_init(void)
2318 {
2319         mdb_walker_t w = {
2320                 "umem_cache", "walk list of umem caches", umem_cache_walk_init,
2321                 umem_cache_walk_step, umem_cache_walk_fini
2322         };
2323 
2324         if (mdb_add_walker(&w) == -1) {
2325                 mdb_warn("failed to add umem_cache walker");
2326                 return (-1);
2327         }
2328 
2329         if (umem_update_variables() == -1)
2330                 return (-1);
2331 
2332         /* install a callback so that our variables are always up-to-date */
2333         (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2334         umem_statechange_cb(NULL);
2335 
2336         /*
2337          * Register our ::whatis callbacks.
2338          */
2339         mdb_whatis_register("umem", whatis_run_umem, NULL,
2340             WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2341         mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2342             WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2343 
2344         return (0);
2345 }
2346 
2347 typedef struct umem_log_cpu {
2348         uintptr_t umc_low;
2349         uintptr_t umc_high;
2350 } umem_log_cpu_t;
2351 
2352 int
2353 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2354 {
2355         int i;
2356 
2357         for (i = 0; i < umem_max_ncpus; i++) {
2358                 if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2359                         break;
2360         }
2361 
2362         if (i == umem_max_ncpus)
2363                 mdb_printf("   ");
2364         else
2365                 mdb_printf("%3d", i);
2366 
2367         mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2368             b->bc_timestamp, b->bc_thread);
2369 
2370         return (WALK_NEXT);
2371 }
2372 
2373 /*ARGSUSED*/
2374 int
2375 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2376 {
2377         umem_log_header_t lh;
2378         umem_cpu_log_header_t clh;
2379         uintptr_t lhp, clhp;
2380         umem_log_cpu_t *umc;
2381         int i;
2382 
2383         if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2384                 mdb_warn("failed to read 'umem_transaction_log'");
2385                 return (DCMD_ERR);
2386         }
2387 
2388         if (lhp == NULL) {
2389                 mdb_warn("no umem transaction log\n");
2390                 return (DCMD_ERR);
2391         }
2392 
2393         if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2394                 mdb_warn("failed to read log header at %p", lhp);
2395                 return (DCMD_ERR);
2396         }
2397 
2398         clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2399 
2400         umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2401             UM_SLEEP | UM_GC);
2402 
2403         for (i = 0; i < umem_max_ncpus; i++) {
2404                 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2405                         mdb_warn("cannot read cpu %d's log header at %p",
2406                             i, clhp);
2407                         return (DCMD_ERR);
2408                 }
2409 
2410                 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2411                     (uintptr_t)lh.lh_base;
2412                 umc[i].umc_high = (uintptr_t)clh.clh_current;
2413 
2414                 clhp += sizeof (umem_cpu_log_header_t);
2415         }
2416 
2417         if (DCMD_HDRSPEC(flags)) {
2418                 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2419                     "BUFADDR", "TIMESTAMP", "THREAD");
2420         }
2421 
2422         /*
2423          * If we have been passed an address, we'll just print out that
2424          * log entry.
2425          */
2426         if (flags & DCMD_ADDRSPEC) {
2427                 umem_bufctl_audit_t *bp;
2428                 UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2429 
2430                 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2431                         mdb_warn("failed to read bufctl at %p", addr);
2432                         return (DCMD_ERR);
2433                 }
2434 
2435                 (void) umem_log_walk(addr, bp, umc);
2436 
2437                 return (DCMD_OK);
2438         }
2439 
2440         if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2441                 mdb_warn("can't find umem log walker");
2442                 return (DCMD_ERR);
2443         }
2444 
2445         return (DCMD_OK);
2446 }
2447 
2448 typedef struct bufctl_history_cb {
2449         int             bhc_flags;
2450         int             bhc_argc;
2451         const mdb_arg_t *bhc_argv;
2452         int             bhc_ret;
2453 } bufctl_history_cb_t;
2454 
2455 /*ARGSUSED*/
2456 static int
2457 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2458 {
2459         bufctl_history_cb_t *bhc = arg;
2460 
2461         bhc->bhc_ret =
2462             bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2463 
2464         bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2465 
2466         return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2467 }
2468 
2469 void
2470 bufctl_help(void)
2471 {
2472         mdb_printf("%s\n",
2473 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2474         mdb_dec_indent(2);
2475         mdb_printf("%<b>OPTIONS%</b>\n");
2476         mdb_inc_indent(2);
2477         mdb_printf("%s",
2478 "  -v    Display the full content of the bufctl, including its stack trace\n"
2479 "  -h    retrieve the bufctl's transaction history, if available\n"
2480 "  -a addr\n"
2481 "        filter out bufctls not involving the buffer at addr\n"
2482 "  -c caller\n"
2483 "        filter out bufctls without the function/PC in their stack trace\n"
2484 "  -e earliest\n"
2485 "        filter out bufctls timestamped before earliest\n"
2486 "  -l latest\n"
2487 "        filter out bufctls timestamped after latest\n"
2488 "  -t thread\n"
2489 "        filter out bufctls not involving thread\n");
2490 }
2491 
2492 int
2493 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2494 {
2495         uint_t verbose = FALSE;
2496         uint_t history = FALSE;
2497         uint_t in_history = FALSE;
2498         uintptr_t caller = NULL, thread = NULL;
2499         uintptr_t laddr, haddr, baddr = NULL;
2500         hrtime_t earliest = 0, latest = 0;
2501         int i, depth;
2502         char c[MDB_SYM_NAMLEN];
2503         GElf_Sym sym;
2504         umem_bufctl_audit_t *bcp;
2505         UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2506 
2507         if (mdb_getopts(argc, argv,
2508             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2509             'h', MDB_OPT_SETBITS, TRUE, &history,
2510             'H', MDB_OPT_SETBITS, TRUE, &in_history,                /* internal */
2511             'c', MDB_OPT_UINTPTR, &caller,
2512             't', MDB_OPT_UINTPTR, &thread,
2513             'e', MDB_OPT_UINT64, &earliest,
2514             'l', MDB_OPT_UINT64, &latest,
2515             'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2516                 return (DCMD_USAGE);
2517 
2518         if (!(flags & DCMD_ADDRSPEC))
2519                 return (DCMD_USAGE);
2520 
2521         if (in_history && !history)
2522                 return (DCMD_USAGE);
2523 
2524         if (history && !in_history) {
2525                 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2526                     UM_SLEEP | UM_GC);
2527                 bufctl_history_cb_t bhc;
2528 
2529                 nargv[0].a_type = MDB_TYPE_STRING;
2530                 nargv[0].a_un.a_str = "-H";             /* prevent recursion */
2531 
2532                 for (i = 0; i < argc; i++)
2533                         nargv[i + 1] = argv[i];
2534 
2535                 /*
2536                  * When in history mode, we treat each element as if it
2537                  * were in a seperate loop, so that the headers group
2538                  * bufctls with similar histories.
2539                  */
2540                 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2541                 bhc.bhc_argc = argc + 1;
2542                 bhc.bhc_argv = nargv;
2543                 bhc.bhc_ret = DCMD_OK;
2544 
2545                 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2546                     addr) == -1) {
2547                         mdb_warn("unable to walk bufctl_history");
2548                         return (DCMD_ERR);
2549                 }
2550 
2551                 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2552                         mdb_printf("\n");
2553 
2554                 return (bhc.bhc_ret);
2555         }
2556 
2557         if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2558                 if (verbose) {
2559                         mdb_printf("%16s %16s %16s %16s\n"
2560                             "%<u>%16s %16s %16s %16s%</u>\n",
2561                             "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2562                             "", "CACHE", "LASTLOG", "CONTENTS");
2563                 } else {
2564                         mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2565                             "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2566                 }
2567         }
2568 
2569         if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2570                 mdb_warn("couldn't read bufctl at %p", addr);
2571                 return (DCMD_ERR);
2572         }
2573 
2574         /*
2575          * Guard against bogus bc_depth in case the bufctl is corrupt or
2576          * the address does not really refer to a bufctl.
2577          */
2578         depth = MIN(bcp->bc_depth, umem_stack_depth);
2579 
2580         if (caller != NULL) {
2581                 laddr = caller;
2582                 haddr = caller + sizeof (caller);
2583 
2584                 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2585                     &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2586                         /*
2587                          * We were provided an exact symbol value; any
2588                          * address in the function is valid.
2589                          */
2590                         laddr = (uintptr_t)sym.st_value;
2591                         haddr = (uintptr_t)sym.st_value + sym.st_size;
2592                 }
2593 
2594                 for (i = 0; i < depth; i++)
2595                         if (bcp->bc_stack[i] >= laddr &&
2596                             bcp->bc_stack[i] < haddr)
2597                                 break;
2598 
2599                 if (i == depth)
2600                         return (DCMD_OK);
2601         }
2602 
2603         if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2604                 return (DCMD_OK);
2605 
2606         if (earliest != 0 && bcp->bc_timestamp < earliest)
2607                 return (DCMD_OK);
2608 
2609         if (latest != 0 && bcp->bc_timestamp > latest)
2610                 return (DCMD_OK);
2611 
2612         if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2613                 return (DCMD_OK);
2614 
2615         if (flags & DCMD_PIPE_OUT) {
2616                 mdb_printf("%#r\n", addr);
2617                 return (DCMD_OK);
2618         }
2619 
2620         if (verbose) {
2621                 mdb_printf(
2622                     "%<b>%16p%</b> %16p %16llx %16d\n"
2623                     "%16s %16p %16p %16p\n",
2624                     addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2625                     "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2626 
2627                 mdb_inc_indent(17);
2628                 for (i = 0; i < depth; i++)
2629                         mdb_printf("%a\n", bcp->bc_stack[i]);
2630                 mdb_dec_indent(17);
2631                 mdb_printf("\n");
2632         } else {
2633                 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2634                     bcp->bc_timestamp, bcp->bc_thread);
2635 
2636                 for (i = 0; i < depth; i++) {
2637                         if (mdb_lookup_by_addr(bcp->bc_stack[i],
2638                             MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2639                                 continue;
2640                         if (is_umem_sym(c, "umem_"))
2641                                 continue;
2642                         mdb_printf(" %a\n", bcp->bc_stack[i]);
2643                         break;
2644                 }
2645 
2646                 if (i >= depth)
2647                         mdb_printf("\n");
2648         }
2649 
2650         return (DCMD_OK);
2651 }
2652 
2653 /*ARGSUSED*/
2654 int
2655 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2656 {
2657         mdb_arg_t a;
2658 
2659         if (!(flags & DCMD_ADDRSPEC))
2660                 return (DCMD_USAGE);
2661 
2662         if (argc != 0)
2663                 return (DCMD_USAGE);
2664 
2665         a.a_type = MDB_TYPE_STRING;
2666         a.a_un.a_str = "-v";
2667 
2668         return (bufctl(addr, flags, 1, &a));
2669 }
2670 
2671 typedef struct umem_verify {
2672         uint64_t *umv_buf;              /* buffer to read cache contents into */
2673         size_t umv_size;                /* number of bytes in umv_buf */
2674         int umv_corruption;             /* > 0 if corruption found. */
2675         int umv_besilent;               /* report actual corruption sites */
2676         struct umem_cache umv_cache;    /* the cache we're operating on */
2677 } umem_verify_t;
2678 
2679 /*
2680  * verify_pattern()
2681  *      verify that buf is filled with the pattern pat.
2682  */
2683 static int64_t
2684 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2685 {
2686         /*LINTED*/
2687         uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2688         uint64_t *buf;
2689 
2690         for (buf = buf_arg; buf < bufend; buf++)
2691                 if (*buf != pat)
2692                         return ((uintptr_t)buf - (uintptr_t)buf_arg);
2693         return (-1);
2694 }
2695 
2696 /*
2697  * verify_buftag()
2698  *      verify that btp->bt_bxstat == (bcp ^ pat)
2699  */
2700 static int
2701 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2702 {
2703         return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2704 }
2705 
2706 /*
2707  * verify_free()
2708  *      verify the integrity of a free block of memory by checking
2709  *      that it is filled with 0xdeadbeef and that its buftag is sane.
2710  */
2711 /*ARGSUSED1*/
2712 static int
2713 verify_free(uintptr_t addr, const void *data, void *private)
2714 {
2715         umem_verify_t *umv = (umem_verify_t *)private;
2716         uint64_t *buf = umv->umv_buf;        /* buf to validate */
2717         int64_t corrupt;                /* corruption offset */
2718         umem_buftag_t *buftagp;         /* ptr to buftag */
2719         umem_cache_t *cp = &umv->umv_cache;
2720         int besilent = umv->umv_besilent;
2721 
2722         /*LINTED*/
2723         buftagp = UMEM_BUFTAG(cp, buf);
2724 
2725         /*
2726          * Read the buffer to check.
2727          */
2728         if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2729                 if (!besilent)
2730                         mdb_warn("couldn't read %p", addr);
2731                 return (WALK_NEXT);
2732         }
2733 
2734         if ((corrupt = verify_pattern(buf, cp->cache_verify,
2735             UMEM_FREE_PATTERN)) >= 0) {
2736                 if (!besilent)
2737                         mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2738                             addr, (uintptr_t)addr + corrupt);
2739                 goto corrupt;
2740         }
2741 
2742         if ((cp->cache_flags & UMF_HASH) &&
2743             buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2744                 if (!besilent)
2745                         mdb_printf("buffer %p (free) seems to "
2746                             "have a corrupt redzone pattern\n", addr);
2747                 goto corrupt;
2748         }
2749 
2750         /*
2751          * confirm bufctl pointer integrity.
2752          */
2753         if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2754                 if (!besilent)
2755                         mdb_printf("buffer %p (free) has a corrupt "
2756                             "buftag\n", addr);
2757                 goto corrupt;
2758         }
2759 
2760         return (WALK_NEXT);
2761 corrupt:
2762         umv->umv_corruption++;
2763         return (WALK_NEXT);
2764 }
2765 
2766 /*
2767  * verify_alloc()
2768  *      Verify that the buftag of an allocated buffer makes sense with respect
2769  *      to the buffer.
2770  */
2771 /*ARGSUSED1*/
2772 static int
2773 verify_alloc(uintptr_t addr, const void *data, void *private)
2774 {
2775         umem_verify_t *umv = (umem_verify_t *)private;
2776         umem_cache_t *cp = &umv->umv_cache;
2777         uint64_t *buf = umv->umv_buf;        /* buf to validate */
2778         /*LINTED*/
2779         umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2780         uint32_t *ip = (uint32_t *)buftagp;
2781         uint8_t *bp = (uint8_t *)buf;
2782         int looks_ok = 0, size_ok = 1;  /* flags for finding corruption */
2783         int besilent = umv->umv_besilent;
2784 
2785         /*
2786          * Read the buffer to check.
2787          */
2788         if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2789                 if (!besilent)
2790                         mdb_warn("couldn't read %p", addr);
2791                 return (WALK_NEXT);
2792         }
2793 
2794         /*
2795          * There are two cases to handle:
2796          * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2797          *    0xfeedfacefeedface at the end of it
2798          * 2. If the buf was alloc'd using umem_alloc, it will have
2799          *    0xbb just past the end of the region in use.  At the buftag,
2800          *    it will have 0xfeedface (or, if the whole buffer is in use,
2801          *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2802          *    endianness), followed by 32 bits containing the offset of the
2803          *    0xbb byte in the buffer.
2804          *
2805          * Finally, the two 32-bit words that comprise the second half of the
2806          * buftag should xor to UMEM_BUFTAG_ALLOC
2807          */
2808 
2809         if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2810                 looks_ok = 1;
2811         else if (!UMEM_SIZE_VALID(ip[1]))
2812                 size_ok = 0;
2813         else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2814                 looks_ok = 1;
2815         else
2816                 size_ok = 0;
2817 
2818         if (!size_ok) {
2819                 if (!besilent)
2820                         mdb_printf("buffer %p (allocated) has a corrupt "
2821                             "redzone size encoding\n", addr);
2822                 goto corrupt;
2823         }
2824 
2825         if (!looks_ok) {
2826                 if (!besilent)
2827                         mdb_printf("buffer %p (allocated) has a corrupt "
2828                             "redzone signature\n", addr);
2829                 goto corrupt;
2830         }
2831 
2832         if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2833                 if (!besilent)
2834                         mdb_printf("buffer %p (allocated) has a "
2835                             "corrupt buftag\n", addr);
2836                 goto corrupt;
2837         }
2838 
2839         return (WALK_NEXT);
2840 corrupt:
2841         umv->umv_corruption++;
2842         return (WALK_NEXT);
2843 }
2844 
2845 /*ARGSUSED2*/
2846 int
2847 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2848 {
2849         if (flags & DCMD_ADDRSPEC) {
2850                 int check_alloc = 0, check_free = 0;
2851                 umem_verify_t umv;
2852 
2853                 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2854                     addr) == -1) {
2855                         mdb_warn("couldn't read umem_cache %p", addr);
2856                         return (DCMD_ERR);
2857                 }
2858 
2859                 umv.umv_size = umv.umv_cache.cache_buftag +
2860                     sizeof (umem_buftag_t);
2861                 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2862                 umv.umv_corruption = 0;
2863 
2864                 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2865                         check_alloc = 1;
2866                         if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2867                                 check_free = 1;
2868                 } else {
2869                         if (!(flags & DCMD_LOOP)) {
2870                                 mdb_warn("cache %p (%s) does not have "
2871                                     "redzone checking enabled\n", addr,
2872                                     umv.umv_cache.cache_name);
2873                         }
2874                         return (DCMD_ERR);
2875                 }
2876 
2877                 if (flags & DCMD_LOOP) {
2878                         /*
2879                          * table mode, don't print out every corrupt buffer
2880                          */
2881                         umv.umv_besilent = 1;
2882                 } else {
2883                         mdb_printf("Summary for cache '%s'\n",
2884                             umv.umv_cache.cache_name);
2885                         mdb_inc_indent(2);
2886                         umv.umv_besilent = 0;
2887                 }
2888 
2889                 if (check_alloc)
2890                         (void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2891                 if (check_free)
2892                         (void) mdb_pwalk("freemem", verify_free, &umv, addr);
2893 
2894                 if (flags & DCMD_LOOP) {
2895                         if (umv.umv_corruption == 0) {
2896                                 mdb_printf("%-*s %?p clean\n",
2897                                     UMEM_CACHE_NAMELEN,
2898                                     umv.umv_cache.cache_name, addr);
2899                         } else {
2900                                 char *s = "";   /* optional s in "buffer[s]" */
2901                                 if (umv.umv_corruption > 1)
2902                                         s = "s";
2903 
2904                                 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2905                                     UMEM_CACHE_NAMELEN,
2906                                     umv.umv_cache.cache_name, addr,
2907                                     umv.umv_corruption, s);
2908                         }
2909                 } else {
2910                         /*
2911                          * This is the more verbose mode, when the user has
2912                          * type addr::umem_verify.  If the cache was clean,
2913                          * nothing will have yet been printed. So say something.
2914                          */
2915                         if (umv.umv_corruption == 0)
2916                                 mdb_printf("clean\n");
2917 
2918                         mdb_dec_indent(2);
2919                 }
2920         } else {
2921                 /*
2922                  * If the user didn't specify a cache to verify, we'll walk all
2923                  * umem_cache's, specifying ourself as a callback for each...
2924                  * this is the equivalent of '::walk umem_cache .::umem_verify'
2925                  */
2926                 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2927                     "Cache Name", "Addr", "Cache Integrity");
2928                 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2929         }
2930 
2931         return (DCMD_OK);
2932 }
2933 
2934 typedef struct vmem_node {
2935         struct vmem_node *vn_next;
2936         struct vmem_node *vn_parent;
2937         struct vmem_node *vn_sibling;
2938         struct vmem_node *vn_children;
2939         uintptr_t vn_addr;
2940         int vn_marked;
2941         vmem_t vn_vmem;
2942 } vmem_node_t;
2943 
2944 typedef struct vmem_walk {
2945         vmem_node_t *vw_root;
2946         vmem_node_t *vw_current;
2947 } vmem_walk_t;
2948 
2949 int
2950 vmem_walk_init(mdb_walk_state_t *wsp)
2951 {
2952         uintptr_t vaddr, paddr;
2953         vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2954         vmem_walk_t *vw;
2955 
2956         if (umem_readvar(&vaddr, "vmem_list") == -1) {
2957                 mdb_warn("couldn't read 'vmem_list'");
2958                 return (WALK_ERR);
2959         }
2960 
2961         while (vaddr != NULL) {
2962                 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2963                 vp->vn_addr = vaddr;
2964                 vp->vn_next = head;
2965                 head = vp;
2966 
2967                 if (vaddr == wsp->walk_addr)
2968                         current = vp;
2969 
2970                 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2971                         mdb_warn("couldn't read vmem_t at %p", vaddr);
2972                         goto err;
2973                 }
2974 
2975                 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2976         }
2977 
2978         for (vp = head; vp != NULL; vp = vp->vn_next) {
2979 
2980                 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2981                         vp->vn_sibling = root;
2982                         root = vp;
2983                         continue;
2984                 }
2985 
2986                 for (parent = head; parent != NULL; parent = parent->vn_next) {
2987                         if (parent->vn_addr != paddr)
2988                                 continue;
2989                         vp->vn_sibling = parent->vn_children;
2990                         parent->vn_children = vp;
2991                         vp->vn_parent = parent;
2992                         break;
2993                 }
2994 
2995                 if (parent == NULL) {
2996                         mdb_warn("couldn't find %p's parent (%p)\n",
2997                             vp->vn_addr, paddr);
2998                         goto err;
2999                 }
3000         }
3001 
3002         vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3003         vw->vw_root = root;
3004 
3005         if (current != NULL)
3006                 vw->vw_current = current;
3007         else
3008                 vw->vw_current = root;
3009 
3010         wsp->walk_data = vw;
3011         return (WALK_NEXT);
3012 err:
3013         for (vp = head; head != NULL; vp = head) {
3014                 head = vp->vn_next;
3015                 mdb_free(vp, sizeof (vmem_node_t));
3016         }
3017 
3018         return (WALK_ERR);
3019 }
3020 
3021 int
3022 vmem_walk_step(mdb_walk_state_t *wsp)
3023 {
3024         vmem_walk_t *vw = wsp->walk_data;
3025         vmem_node_t *vp;
3026         int rval;
3027 
3028         if ((vp = vw->vw_current) == NULL)
3029                 return (WALK_DONE);
3030 
3031         rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3032 
3033         if (vp->vn_children != NULL) {
3034                 vw->vw_current = vp->vn_children;
3035                 return (rval);
3036         }
3037 
3038         do {
3039                 vw->vw_current = vp->vn_sibling;
3040                 vp = vp->vn_parent;
3041         } while (vw->vw_current == NULL && vp != NULL);
3042 
3043         return (rval);
3044 }
3045 
3046 /*
3047  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3048  * children are visited before their parent.  We perform the postfix walk
3049  * iteratively (rather than recursively) to allow mdb to regain control
3050  * after each callback.
3051  */
3052 int
3053 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3054 {
3055         vmem_walk_t *vw = wsp->walk_data;
3056         vmem_node_t *vp = vw->vw_current;
3057         int rval;
3058 
3059         /*
3060          * If this node is marked, then we know that we have already visited
3061          * all of its children.  If the node has any siblings, they need to
3062          * be visited next; otherwise, we need to visit the parent.  Note
3063          * that vp->vn_marked will only be zero on the first invocation of
3064          * the step function.
3065          */
3066         if (vp->vn_marked) {
3067                 if (vp->vn_sibling != NULL)
3068                         vp = vp->vn_sibling;
3069                 else if (vp->vn_parent != NULL)
3070                         vp = vp->vn_parent;
3071                 else {
3072                         /*
3073                          * We have neither a parent, nor a sibling, and we
3074                          * have already been visited; we're done.
3075                          */
3076                         return (WALK_DONE);
3077                 }
3078         }
3079 
3080         /*
3081          * Before we visit this node, visit its children.
3082          */
3083         while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3084                 vp = vp->vn_children;
3085 
3086         vp->vn_marked = 1;
3087         vw->vw_current = vp;
3088         rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3089 
3090         return (rval);
3091 }
3092 
3093 void
3094 vmem_walk_fini(mdb_walk_state_t *wsp)
3095 {
3096         vmem_walk_t *vw = wsp->walk_data;
3097         vmem_node_t *root = vw->vw_root;
3098         int done;
3099 
3100         if (root == NULL)
3101                 return;
3102 
3103         if ((vw->vw_root = root->vn_children) != NULL)
3104                 vmem_walk_fini(wsp);
3105 
3106         vw->vw_root = root->vn_sibling;
3107         done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3108         mdb_free(root, sizeof (vmem_node_t));
3109 
3110         if (done) {
3111                 mdb_free(vw, sizeof (vmem_walk_t));
3112         } else {
3113                 vmem_walk_fini(wsp);
3114         }
3115 }
3116 
3117 typedef struct vmem_seg_walk {
3118         uint8_t vsw_type;
3119         uintptr_t vsw_start;
3120         uintptr_t vsw_current;
3121 } vmem_seg_walk_t;
3122 
3123 /*ARGSUSED*/
3124 int
3125 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3126 {
3127         vmem_seg_walk_t *vsw;
3128 
3129         if (wsp->walk_addr == NULL) {
3130                 mdb_warn("vmem_%s does not support global walks\n", name);
3131                 return (WALK_ERR);
3132         }
3133 
3134         wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3135 
3136         vsw->vsw_type = type;
3137         vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3138         vsw->vsw_current = vsw->vsw_start;
3139 
3140         return (WALK_NEXT);
3141 }
3142 
3143 /*
3144  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3145  */
3146 #define VMEM_NONE       0
3147 
3148 int
3149 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3150 {
3151         return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3152 }
3153 
3154 int
3155 vmem_free_walk_init(mdb_walk_state_t *wsp)
3156 {
3157         return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3158 }
3159 
3160 int
3161 vmem_span_walk_init(mdb_walk_state_t *wsp)
3162 {
3163         return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3164 }
3165 
3166 int
3167 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3168 {
3169         return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3170 }
3171 
3172 int
3173 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3174 {
3175         vmem_seg_t seg;
3176         vmem_seg_walk_t *vsw = wsp->walk_data;
3177         uintptr_t addr = vsw->vsw_current;
3178         static size_t seg_size = 0;
3179         int rval;
3180 
3181         if (!seg_size) {
3182                 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3183                         mdb_warn("failed to read 'vmem_seg_size'");
3184                         seg_size = sizeof (vmem_seg_t);
3185                 }
3186         }
3187 
3188         if (seg_size < sizeof (seg))
3189                 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3190 
3191         if (mdb_vread(&seg, seg_size, addr) == -1) {
3192                 mdb_warn("couldn't read vmem_seg at %p", addr);
3193                 return (WALK_ERR);
3194         }
3195 
3196         vsw->vsw_current = (uintptr_t)seg.vs_anext;
3197         if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3198                 rval = WALK_NEXT;
3199         } else {
3200                 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3201         }
3202 
3203         if (vsw->vsw_current == vsw->vsw_start)
3204                 return (WALK_DONE);
3205 
3206         return (rval);
3207 }
3208 
3209 void
3210 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3211 {
3212         vmem_seg_walk_t *vsw = wsp->walk_data;
3213 
3214         mdb_free(vsw, sizeof (vmem_seg_walk_t));
3215 }
3216 
3217 #define VMEM_NAMEWIDTH  22
3218 
3219 int
3220 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3221 {
3222         vmem_t v, parent;
3223         uintptr_t paddr;
3224         int ident = 0;
3225         char c[VMEM_NAMEWIDTH];
3226 
3227         if (!(flags & DCMD_ADDRSPEC)) {
3228                 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3229                         mdb_warn("can't walk vmem");
3230                         return (DCMD_ERR);
3231                 }
3232                 return (DCMD_OK);
3233         }
3234 
3235         if (DCMD_HDRSPEC(flags))
3236                 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3237                     "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3238                     "TOTAL", "SUCCEED", "FAIL");
3239 
3240         if (mdb_vread(&v, sizeof (v), addr) == -1) {
3241                 mdb_warn("couldn't read vmem at %p", addr);
3242                 return (DCMD_ERR);
3243         }
3244 
3245         for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3246                 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3247                         mdb_warn("couldn't trace %p's ancestry", addr);
3248                         ident = 0;
3249                         break;
3250                 }
3251                 paddr = (uintptr_t)parent.vm_source;
3252         }
3253 
3254         (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3255 
3256         mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3257             addr, VMEM_NAMEWIDTH, c,
3258             v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3259             v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3260 
3261         return (DCMD_OK);
3262 }
3263 
3264 void
3265 vmem_seg_help(void)
3266 {
3267         mdb_printf("%s\n",
3268 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3269 "\n"
3270 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3271 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3272 "information.\n");
3273         mdb_dec_indent(2);
3274         mdb_printf("%<b>OPTIONS%</b>\n");
3275         mdb_inc_indent(2);
3276         mdb_printf("%s",
3277 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3278 "  -s    report the size of the segment, instead of the end address\n"
3279 "  -c caller\n"
3280 "        filter out segments without the function/PC in their stack trace\n"
3281 "  -e earliest\n"
3282 "        filter out segments timestamped before earliest\n"
3283 "  -l latest\n"
3284 "        filter out segments timestamped after latest\n"
3285 "  -m minsize\n"
3286 "        filer out segments smaller than minsize\n"
3287 "  -M maxsize\n"
3288 "        filer out segments larger than maxsize\n"
3289 "  -t thread\n"
3290 "        filter out segments not involving thread\n"
3291 "  -T type\n"
3292 "        filter out segments not of type 'type'\n"
3293 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3294 }
3295 
3296 
3297 /*ARGSUSED*/
3298 int
3299 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3300 {
3301         vmem_seg_t vs;
3302         uintptr_t *stk = vs.vs_stack;
3303         uintptr_t sz;
3304         uint8_t t;
3305         const char *type = NULL;
3306         GElf_Sym sym;
3307         char c[MDB_SYM_NAMLEN];
3308         int no_debug;
3309         int i;
3310         int depth;
3311         uintptr_t laddr, haddr;
3312 
3313         uintptr_t caller = NULL, thread = NULL;
3314         uintptr_t minsize = 0, maxsize = 0;
3315 
3316         hrtime_t earliest = 0, latest = 0;
3317 
3318         uint_t size = 0;
3319         uint_t verbose = 0;
3320 
3321         if (!(flags & DCMD_ADDRSPEC))
3322                 return (DCMD_USAGE);
3323 
3324         if (mdb_getopts(argc, argv,
3325             'c', MDB_OPT_UINTPTR, &caller,
3326             'e', MDB_OPT_UINT64, &earliest,
3327             'l', MDB_OPT_UINT64, &latest,
3328             's', MDB_OPT_SETBITS, TRUE, &size,
3329             'm', MDB_OPT_UINTPTR, &minsize,
3330             'M', MDB_OPT_UINTPTR, &maxsize,
3331             't', MDB_OPT_UINTPTR, &thread,
3332             'T', MDB_OPT_STR, &type,
3333             'v', MDB_OPT_SETBITS, TRUE, &verbose,
3334             NULL) != argc)
3335                 return (DCMD_USAGE);
3336 
3337         if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3338                 if (verbose) {
3339                         mdb_printf("%16s %4s %16s %16s %16s\n"
3340                             "%<u>%16s %4s %16s %16s %16s%</u>\n",
3341                             "ADDR", "TYPE", "START", "END", "SIZE",
3342                             "", "", "THREAD", "TIMESTAMP", "");
3343                 } else {
3344                         mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3345                             "START", size? "SIZE" : "END", "WHO");
3346                 }
3347         }
3348 
3349         if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3350                 mdb_warn("couldn't read vmem_seg at %p", addr);
3351                 return (DCMD_ERR);
3352         }
3353 
3354         if (type != NULL) {
3355                 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3356                         t = VMEM_ALLOC;
3357                 else if (strcmp(type, "FREE") == 0)
3358                         t = VMEM_FREE;
3359                 else if (strcmp(type, "SPAN") == 0)
3360                         t = VMEM_SPAN;
3361                 else if (strcmp(type, "ROTR") == 0 ||
3362                     strcmp(type, "ROTOR") == 0)
3363                         t = VMEM_ROTOR;
3364                 else if (strcmp(type, "WLKR") == 0 ||
3365                     strcmp(type, "WALKER") == 0)
3366                         t = VMEM_WALKER;
3367                 else {
3368                         mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3369                             type);
3370                         return (DCMD_ERR);
3371                 }
3372 
3373                 if (vs.vs_type != t)
3374                         return (DCMD_OK);
3375         }
3376 
3377         sz = vs.vs_end - vs.vs_start;
3378 
3379         if (minsize != 0 && sz < minsize)
3380                 return (DCMD_OK);
3381 
3382         if (maxsize != 0 && sz > maxsize)
3383                 return (DCMD_OK);
3384 
3385         t = vs.vs_type;
3386         depth = vs.vs_depth;
3387 
3388         /*
3389          * debug info, when present, is only accurate for VMEM_ALLOC segments
3390          */
3391         no_debug = (t != VMEM_ALLOC) ||
3392             (depth == 0 || depth > VMEM_STACK_DEPTH);
3393 
3394         if (no_debug) {
3395                 if (caller != NULL || thread != NULL || earliest != 0 ||
3396                     latest != 0)
3397                         return (DCMD_OK);               /* not enough info */
3398         } else {
3399                 if (caller != NULL) {
3400                         laddr = caller;
3401                         haddr = caller + sizeof (caller);
3402 
3403                         if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3404                             sizeof (c), &sym) != -1 &&
3405                             caller == (uintptr_t)sym.st_value) {
3406                                 /*
3407                                  * We were provided an exact symbol value; any
3408                                  * address in the function is valid.
3409                                  */
3410                                 laddr = (uintptr_t)sym.st_value;
3411                                 haddr = (uintptr_t)sym.st_value + sym.st_size;
3412                         }
3413 
3414                         for (i = 0; i < depth; i++)
3415                                 if (vs.vs_stack[i] >= laddr &&
3416                                     vs.vs_stack[i] < haddr)
3417                                         break;
3418 
3419                         if (i == depth)
3420                                 return (DCMD_OK);
3421                 }
3422 
3423                 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3424                         return (DCMD_OK);
3425 
3426                 if (earliest != 0 && vs.vs_timestamp < earliest)
3427                         return (DCMD_OK);
3428 
3429                 if (latest != 0 && vs.vs_timestamp > latest)
3430                         return (DCMD_OK);
3431         }
3432 
3433         type = (t == VMEM_ALLOC ? "ALLC" :
3434             t == VMEM_FREE ? "FREE" :
3435             t == VMEM_SPAN ? "SPAN" :
3436             t == VMEM_ROTOR ? "ROTR" :
3437             t == VMEM_WALKER ? "WLKR" :
3438             "????");
3439 
3440         if (flags & DCMD_PIPE_OUT) {
3441                 mdb_printf("%#r\n", addr);
3442                 return (DCMD_OK);
3443         }
3444 
3445         if (verbose) {
3446                 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3447                     addr, type, vs.vs_start, vs.vs_end, sz);
3448 
3449                 if (no_debug)
3450                         return (DCMD_OK);
3451 
3452                 mdb_printf("%16s %4s %16d %16llx\n",
3453                     "", "", vs.vs_thread, vs.vs_timestamp);
3454 
3455                 mdb_inc_indent(17);
3456                 for (i = 0; i < depth; i++) {
3457                         mdb_printf("%a\n", stk[i]);
3458                 }
3459                 mdb_dec_indent(17);
3460                 mdb_printf("\n");
3461         } else {
3462                 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3463                     vs.vs_start, size? sz : vs.vs_end);
3464 
3465                 if (no_debug) {
3466                         mdb_printf("\n");
3467                         return (DCMD_OK);
3468                 }
3469 
3470                 for (i = 0; i < depth; i++) {
3471                         if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3472                             c, sizeof (c), &sym) == -1)
3473                                 continue;
3474                         if (is_umem_sym(c, "vmem_"))
3475                                 continue;
3476                         break;
3477                 }
3478                 mdb_printf(" %a\n", stk[i]);
3479         }
3480         return (DCMD_OK);
3481 }
3482 
3483 /*ARGSUSED*/
3484 static int
3485 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3486 {
3487         char name[UMEM_CACHE_NAMELEN + 1];
3488         hrtime_t delta;
3489         int i, depth;
3490 
3491         if (bcp->bc_timestamp == 0)
3492                 return (WALK_DONE);
3493 
3494         if (*newest == 0)
3495                 *newest = bcp->bc_timestamp;
3496 
3497         delta = *newest - bcp->bc_timestamp;
3498         depth = MIN(bcp->bc_depth, umem_stack_depth);
3499 
3500         if (mdb_readstr(name, sizeof (name), (uintptr_t)
3501             &bcp->bc_cache->cache_name) <= 0)
3502                 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3503 
3504         mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3505             delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3506 
3507         for (i = 0; i < depth; i++)
3508                 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3509 
3510         return (WALK_NEXT);
3511 }
3512 
3513 int
3514 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3515 {
3516         const char *logname = "umem_transaction_log";
3517         hrtime_t newest = 0;
3518 
3519         if ((flags & DCMD_ADDRSPEC) || argc > 1)
3520                 return (DCMD_USAGE);
3521 
3522         if (argc > 0) {
3523                 if (argv->a_type != MDB_TYPE_STRING)
3524                         return (DCMD_USAGE);
3525                 if (strcmp(argv->a_un.a_str, "fail") == 0)
3526                         logname = "umem_failure_log";
3527                 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3528                         logname = "umem_slab_log";
3529                 else
3530                         return (DCMD_USAGE);
3531         }
3532 
3533         if (umem_readvar(&addr, logname) == -1) {
3534                 mdb_warn("failed to read %s log header pointer");
3535                 return (DCMD_ERR);
3536         }
3537 
3538         if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3539                 mdb_warn("failed to walk umem log");
3540                 return (DCMD_ERR);
3541         }
3542 
3543         return (DCMD_OK);
3544 }
3545 
3546 /*
3547  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3548  * The first piece is a structure which we use to accumulate umem_cache_t
3549  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3550  * walker; we either add all caches, or ones named explicitly as arguments.
3551  */
3552 
3553 typedef struct umclist {
3554         const char *umc_name;                   /* Name to match (or NULL) */
3555         uintptr_t *umc_caches;                  /* List of umem_cache_t addrs */
3556         int umc_nelems;                         /* Num entries in umc_caches */
3557         int umc_size;                           /* Size of umc_caches array */
3558 } umclist_t;
3559 
3560 static int
3561 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3562 {
3563         void *p;
3564         int s;
3565 
3566         if (umc->umc_name == NULL ||
3567             strcmp(cp->cache_name, umc->umc_name) == 0) {
3568                 /*
3569                  * If we have a match, grow our array (if necessary), and then
3570                  * add the virtual address of the matching cache to our list.
3571                  */
3572                 if (umc->umc_nelems >= umc->umc_size) {
3573                         s = umc->umc_size ? umc->umc_size * 2 : 256;
3574                         p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3575 
3576                         bcopy(umc->umc_caches, p,
3577                             sizeof (uintptr_t) * umc->umc_size);
3578 
3579                         umc->umc_caches = p;
3580                         umc->umc_size = s;
3581                 }
3582 
3583                 umc->umc_caches[umc->umc_nelems++] = addr;
3584                 return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3585         }
3586 
3587         return (WALK_NEXT);
3588 }
3589 
3590 /*
3591  * The second piece of ::umausers is a hash table of allocations.  Each
3592  * allocation owner is identified by its stack trace and data_size.  We then
3593  * track the total bytes of all such allocations, and the number of allocations
3594  * to report at the end.  Once we have a list of caches, we walk through the
3595  * allocated bufctls of each, and update our hash table accordingly.
3596  */
3597 
3598 typedef struct umowner {
3599         struct umowner *umo_head;               /* First hash elt in bucket */
3600         struct umowner *umo_next;               /* Next hash elt in chain */
3601         size_t umo_signature;                   /* Hash table signature */
3602         uint_t umo_num;                         /* Number of allocations */
3603         size_t umo_data_size;                   /* Size of each allocation */
3604         size_t umo_total_size;                  /* Total bytes of allocation */
3605         int umo_depth;                          /* Depth of stack trace */
3606         uintptr_t *umo_stack;                   /* Stack trace */
3607 } umowner_t;
3608 
3609 typedef struct umusers {
3610         const umem_cache_t *umu_cache;          /* Current umem cache */
3611         umowner_t *umu_hash;                    /* Hash table of owners */
3612         uintptr_t *umu_stacks;                  /* stacks for owners */
3613         int umu_nelems;                         /* Number of entries in use */
3614         int umu_size;                           /* Total number of entries */
3615 } umusers_t;
3616 
3617 static void
3618 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3619     size_t size, size_t data_size)
3620 {
3621         int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3622         size_t bucket, signature = data_size;
3623         umowner_t *umo, *umoend;
3624 
3625         /*
3626          * If the hash table is full, double its size and rehash everything.
3627          */
3628         if (umu->umu_nelems >= umu->umu_size) {
3629                 int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3630                 size_t umowner_size = sizeof (umowner_t);
3631                 size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3632                 uintptr_t *new_stacks;
3633 
3634                 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3635                 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3636 
3637                 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3638                 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3639                 umu->umu_hash = umo;
3640                 umu->umu_stacks = new_stacks;
3641                 umu->umu_size = s;
3642 
3643                 umoend = umu->umu_hash + umu->umu_size;
3644                 for (umo = umu->umu_hash; umo < umoend; umo++) {
3645                         umo->umo_head = NULL;
3646                         umo->umo_stack = &umu->umu_stacks[
3647                             umem_stack_depth * (umo - umu->umu_hash)];
3648                 }
3649 
3650                 umoend = umu->umu_hash + umu->umu_nelems;
3651                 for (umo = umu->umu_hash; umo < umoend; umo++) {
3652                         bucket = umo->umo_signature & (umu->umu_size - 1);
3653                         umo->umo_next = umu->umu_hash[bucket].umo_head;
3654                         umu->umu_hash[bucket].umo_head = umo;
3655                 }
3656         }
3657 
3658         /*
3659          * Finish computing the hash signature from the stack trace, and then
3660          * see if the owner is in the hash table.  If so, update our stats.
3661          */
3662         for (i = 0; i < depth; i++)
3663                 signature += bcp->bc_stack[i];
3664 
3665         bucket = signature & (umu->umu_size - 1);
3666 
3667         for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3668                 if (umo->umo_signature == signature) {
3669                         size_t difference = 0;
3670 
3671                         difference |= umo->umo_data_size - data_size;
3672                         difference |= umo->umo_depth - depth;
3673 
3674                         for (i = 0; i < depth; i++) {
3675                                 difference |= umo->umo_stack[i] -
3676                                     bcp->bc_stack[i];
3677                         }
3678 
3679                         if (difference == 0) {
3680                                 umo->umo_total_size += size;
3681                                 umo->umo_num++;
3682                                 return;
3683                         }
3684                 }
3685         }
3686 
3687         /*
3688          * If the owner is not yet hashed, grab the next element and fill it
3689          * in based on the allocation information.
3690          */
3691         umo = &umu->umu_hash[umu->umu_nelems++];
3692         umo->umo_next = umu->umu_hash[bucket].umo_head;
3693         umu->umu_hash[bucket].umo_head = umo;
3694 
3695         umo->umo_signature = signature;
3696         umo->umo_num = 1;
3697         umo->umo_data_size = data_size;
3698         umo->umo_total_size = size;
3699         umo->umo_depth = depth;
3700 
3701         for (i = 0; i < depth; i++)
3702                 umo->umo_stack[i] = bcp->bc_stack[i];
3703 }
3704 
3705 /*
3706  * When ::umausers is invoked without the -f flag, we simply update our hash
3707  * table with the information from each allocated bufctl.
3708  */
3709 /*ARGSUSED*/
3710 static int
3711 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3712 {
3713         const umem_cache_t *cp = umu->umu_cache;
3714 
3715         umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3716         return (WALK_NEXT);
3717 }
3718 
3719 /*
3720  * When ::umausers is invoked with the -f flag, we print out the information
3721  * for each bufctl as well as updating the hash table.
3722  */
3723 static int
3724 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3725 {
3726         int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3727         const umem_cache_t *cp = umu->umu_cache;
3728 
3729         mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3730             cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3731 
3732         for (i = 0; i < depth; i++)
3733                 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3734 
3735         umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3736         return (WALK_NEXT);
3737 }
3738 
3739 /*
3740  * We sort our results by allocation size before printing them.
3741  */
3742 static int
3743 umownercmp(const void *lp, const void *rp)
3744 {
3745         const umowner_t *lhs = lp;
3746         const umowner_t *rhs = rp;
3747 
3748         return (rhs->umo_total_size - lhs->umo_total_size);
3749 }
3750 
3751 /*
3752  * The main engine of ::umausers is relatively straightforward: First we
3753  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3754  * iterate over the allocated bufctls of each cache in the list.  Finally,
3755  * we sort and print our results.
3756  */
3757 /*ARGSUSED*/
3758 int
3759 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3760 {
3761         int mem_threshold = 8192;       /* Minimum # bytes for printing */
3762         int cnt_threshold = 100;        /* Minimum # blocks for printing */
3763         int audited_caches = 0;         /* Number of UMF_AUDIT caches found */
3764         int do_all_caches = 1;          /* Do all caches (no arguments) */
3765         int opt_e = FALSE;              /* Include "small" users */
3766         int opt_f = FALSE;              /* Print stack traces */
3767 
3768         mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3769         umowner_t *umo, *umoend;
3770         int i, oelems;
3771 
3772         umclist_t umc;
3773         umusers_t umu;
3774 
3775         if (flags & DCMD_ADDRSPEC)
3776                 return (DCMD_USAGE);
3777 
3778         bzero(&umc, sizeof (umc));
3779         bzero(&umu, sizeof (umu));
3780 
3781         while ((i = mdb_getopts(argc, argv,
3782             'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3783             'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3784 
3785                 argv += i;      /* skip past options we just processed */
3786                 argc -= i;      /* adjust argc */
3787 
3788                 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3789                         return (DCMD_USAGE);
3790 
3791                 oelems = umc.umc_nelems;
3792                 umc.umc_name = argv->a_un.a_str;
3793                 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3794 
3795                 if (umc.umc_nelems == oelems) {
3796                         mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3797                         return (DCMD_ERR);
3798                 }
3799 
3800                 do_all_caches = 0;
3801                 argv++;
3802                 argc--;
3803         }
3804 
3805         if (opt_e)
3806                 mem_threshold = cnt_threshold = 0;
3807 
3808         if (opt_f)
3809                 callback = (mdb_walk_cb_t)umause2;
3810 
3811         if (do_all_caches) {
3812                 umc.umc_name = NULL; /* match all cache names */
3813                 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3814         }
3815 
3816         for (i = 0; i < umc.umc_nelems; i++) {
3817                 uintptr_t cp = umc.umc_caches[i];
3818                 umem_cache_t c;
3819 
3820                 if (mdb_vread(&c, sizeof (c), cp) == -1) {
3821                         mdb_warn("failed to read cache at %p", cp);
3822                         continue;
3823                 }
3824 
3825                 if (!(c.cache_flags & UMF_AUDIT)) {
3826                         if (!do_all_caches) {
3827                                 mdb_warn("UMF_AUDIT is not enabled for %s\n",
3828                                     c.cache_name);
3829                         }
3830                         continue;
3831                 }
3832 
3833                 umu.umu_cache = &c;
3834                 (void) mdb_pwalk("bufctl", callback, &umu, cp);
3835                 audited_caches++;
3836         }
3837 
3838         if (audited_caches == 0 && do_all_caches) {
3839                 mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3840                 return (DCMD_ERR);
3841         }
3842 
3843         qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3844         umoend = umu.umu_hash + umu.umu_nelems;
3845 
3846         for (umo = umu.umu_hash; umo < umoend; umo++) {
3847                 if (umo->umo_total_size < mem_threshold &&
3848                     umo->umo_num < cnt_threshold)
3849                         continue;
3850                 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3851                     umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3852                 for (i = 0; i < umo->umo_depth; i++)
3853                         mdb_printf("\t %a\n", umo->umo_stack[i]);
3854         }
3855 
3856         return (DCMD_OK);
3857 }
3858 
3859 struct malloc_data {
3860         uint32_t malloc_size;
3861         uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3862 };
3863 
3864 #ifdef _LP64
3865 #define UMI_MAX_BUCKET          (UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3866 #else
3867 #define UMI_MAX_BUCKET          (UMEM_MAXBUF - sizeof (struct malloc_data))
3868 #endif
3869 
3870 typedef struct umem_malloc_info {
3871         size_t um_total;        /* total allocated buffers */
3872         size_t um_malloc;       /* malloc buffers */
3873         size_t um_malloc_size;  /* sum of malloc buffer sizes */
3874         size_t um_malloc_overhead; /* sum of in-chunk overheads */
3875 
3876         umem_cache_t *um_cp;
3877 
3878         uint_t *um_bucket;
3879 } umem_malloc_info_t;
3880 
3881 static void
3882 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3883     size_t maxbuckets, size_t minbucketsize, int geometric)
3884 {
3885         uint64_t um_malloc;
3886         int minb = -1;
3887         int maxb = -1;
3888         int buckets;
3889         int nbucks;
3890         int i;
3891         int b;
3892         const int *distarray;
3893 
3894         minb = (int)minmalloc;
3895         maxb = (int)maxmalloc;
3896 
3897         nbucks = buckets = maxb - minb + 1;
3898 
3899         um_malloc = 0;
3900         for (b = minb; b <= maxb; b++)
3901                 um_malloc += um_bucket[b];
3902 
3903         if (maxbuckets != 0)
3904                 buckets = MIN(buckets, maxbuckets);
3905 
3906         if (minbucketsize > 1) {
3907                 buckets = MIN(buckets, nbucks/minbucketsize);
3908                 if (buckets == 0) {
3909                         buckets = 1;
3910                         minbucketsize = nbucks;
3911                 }
3912         }
3913 
3914         if (geometric)
3915                 distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3916         else
3917                 distarray = dist_linear(buckets, minb, maxb);
3918 
3919         dist_print_header("malloc size", 11, "count");
3920         for (i = 0; i < buckets; i++) {
3921                 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3922         }
3923         mdb_printf("\n");
3924 }
3925 
3926 /*
3927  * A malloc()ed buffer looks like:
3928  *
3929  *      <----------- mi.malloc_size --->
3930  *      <----------- cp.cache_bufsize ------------------>
3931  *      <----------- cp.cache_chunksize -------------------------------->
3932  *      +-------+-----------------------+---------------+---------------+
3933  *      |/tag///| mallocsz              |/round-off/////|/debug info////|
3934  *      +-------+---------------------------------------+---------------+
3935  *              <-- usable space ------>
3936  *
3937  * mallocsz is the argument to malloc(3C).
3938  * mi.malloc_size is the actual size passed to umem_alloc(), which
3939  * is rounded up to the smallest available cache size, which is
3940  * cache_bufsize.  If there is debugging or alignment overhead in
3941  * the cache, that is reflected in a larger cache_chunksize.
3942  *
3943  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3944  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3945  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3946  * the tag has 8 bytes of padding before it.
3947  *
3948  * 32-byte, 64-byte buffers <= 8 bytes:
3949  *      +-------+-------+--------- ...
3950  *      |/size//|/stat//| mallocsz ...
3951  *      +-------+-------+--------- ...
3952  *                      ^
3953  *                      pointer returned from malloc(3C)
3954  *
3955  * 64-byte buffers > 8 bytes:
3956  *      +---------------+-------+-------+--------- ...
3957  *      |/padding///////|/size//|/stat//| mallocsz ...
3958  *      +---------------+-------+-------+--------- ...
3959  *                                      ^
3960  *                                      pointer returned from malloc(3C)
3961  *
3962  * The "size" field is "malloc_size", which is mallocsz + the padding.
3963  * The "stat" field is derived from malloc_size, and functions as a
3964  * validation that this buffer is actually from malloc(3C).
3965  */
3966 /*ARGSUSED*/
3967 static int
3968 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3969 {
3970         struct malloc_data md;
3971         size_t m_addr = addr;
3972         size_t overhead = sizeof (md);
3973         size_t mallocsz;
3974 
3975         ump->um_total++;
3976 
3977 #ifdef _LP64
3978         if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3979                 m_addr += overhead;
3980                 overhead += sizeof (md);
3981         }
3982 #endif
3983 
3984         if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3985                 mdb_warn("unable to read malloc header at %p", m_addr);
3986                 return (WALK_NEXT);
3987         }
3988 
3989         switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3990         case MALLOC_MAGIC:
3991 #ifdef _LP64
3992         case MALLOC_SECOND_MAGIC:
3993 #endif
3994                 mallocsz = md.malloc_size - overhead;
3995 
3996                 ump->um_malloc++;
3997                 ump->um_malloc_size += mallocsz;
3998                 ump->um_malloc_overhead += overhead;
3999 
4000                 /* include round-off and debug overhead */
4001                 ump->um_malloc_overhead +=
4002                     ump->um_cp->cache_chunksize - md.malloc_size;
4003 
4004                 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4005                         ump->um_bucket[mallocsz]++;
4006 
4007                 break;
4008         default:
4009                 break;
4010         }
4011 
4012         return (WALK_NEXT);
4013 }
4014 
4015 int
4016 get_umem_alloc_sizes(int **out, size_t *out_num)
4017 {
4018         GElf_Sym sym;
4019 
4020         if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4021                 mdb_warn("unable to look up umem_alloc_sizes");
4022                 return (-1);
4023         }
4024 
4025         *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4026         *out_num = sym.st_size / sizeof (int);
4027 
4028         if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4029                 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4030                 *out = NULL;
4031                 return (-1);
4032         }
4033 
4034         return (0);
4035 }
4036 
4037 
4038 static int
4039 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4040 {
4041         if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4042                 return (WALK_NEXT);
4043 
4044         ump->um_cp = cp;
4045 
4046         if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4047             -1) {
4048                 mdb_warn("can't walk 'umem' for cache %p", addr);
4049                 return (WALK_ERR);
4050         }
4051 
4052         return (WALK_NEXT);
4053 }
4054 
4055 void
4056 umem_malloc_dist_help(void)
4057 {
4058         mdb_printf("%s\n",
4059             "report distribution of outstanding malloc()s");
4060         mdb_dec_indent(2);
4061         mdb_printf("%<b>OPTIONS%</b>\n");
4062         mdb_inc_indent(2);
4063         mdb_printf("%s",
4064 "  -b maxbins\n"
4065 "        Use at most maxbins bins for the data\n"
4066 "  -B minbinsize\n"
4067 "        Make the bins at least minbinsize bytes apart\n"
4068 "  -d    dump the raw data out, without binning\n"
4069 "  -g    use geometric binning instead of linear binning\n");
4070 }
4071 
4072 /*ARGSUSED*/
4073 int
4074 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4075 {
4076         umem_malloc_info_t mi;
4077         uint_t geometric = 0;
4078         uint_t dump = 0;
4079         size_t maxbuckets = 0;
4080         size_t minbucketsize = 0;
4081 
4082         size_t minalloc = 0;
4083         size_t maxalloc = UMI_MAX_BUCKET;
4084 
4085         if (flags & DCMD_ADDRSPEC)
4086                 return (DCMD_USAGE);
4087 
4088         if (mdb_getopts(argc, argv,
4089             'd', MDB_OPT_SETBITS, TRUE, &dump,
4090             'g', MDB_OPT_SETBITS, TRUE, &geometric,
4091             'b', MDB_OPT_UINTPTR, &maxbuckets,
4092             'B', MDB_OPT_UINTPTR, &minbucketsize,
4093             0) != argc)
4094                 return (DCMD_USAGE);
4095 
4096         bzero(&mi, sizeof (mi));
4097         mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4098             UM_SLEEP | UM_GC);
4099 
4100         if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4101             &mi) == -1) {
4102                 mdb_warn("unable to walk 'umem_cache'");
4103                 return (DCMD_ERR);
4104         }
4105 
4106         if (dump) {
4107                 int i;
4108                 for (i = minalloc; i <= maxalloc; i++)
4109                         mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4110 
4111                 return (DCMD_OK);
4112         }
4113 
4114         umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4115             maxbuckets, minbucketsize, geometric);
4116 
4117         return (DCMD_OK);
4118 }
4119 
4120 void
4121 umem_malloc_info_help(void)
4122 {
4123         mdb_printf("%s\n",
4124             "report information about malloc()s by cache.  ");
4125         mdb_dec_indent(2);
4126         mdb_printf("%<b>OPTIONS%</b>\n");
4127         mdb_inc_indent(2);
4128         mdb_printf("%s",
4129 "  -b maxbins\n"
4130 "        Use at most maxbins bins for the data\n"
4131 "  -B minbinsize\n"
4132 "        Make the bins at least minbinsize bytes apart\n"
4133 "  -d    dump the raw distribution data without binning\n"
4134 #ifndef _KMDB
4135 "  -g    use geometric binning instead of linear binning\n"
4136 #endif
4137             "");
4138 }
4139 int
4140 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4141 {
4142         umem_cache_t c;
4143         umem_malloc_info_t mi;
4144 
4145         int skip = 0;
4146 
4147         size_t maxmalloc;
4148         size_t overhead;
4149         size_t allocated;
4150         size_t avg_malloc;
4151         size_t overhead_pct;    /* 1000 * overhead_percent */
4152 
4153         uint_t verbose = 0;
4154         uint_t dump = 0;
4155         uint_t geometric = 0;
4156         size_t maxbuckets = 0;
4157         size_t minbucketsize = 0;
4158 
4159         int *alloc_sizes;
4160         int idx;
4161         size_t num;
4162         size_t minmalloc;
4163 
4164         if (mdb_getopts(argc, argv,
4165             'd', MDB_OPT_SETBITS, TRUE, &dump,
4166             'g', MDB_OPT_SETBITS, TRUE, &geometric,
4167             'b', MDB_OPT_UINTPTR, &maxbuckets,
4168             'B', MDB_OPT_UINTPTR, &minbucketsize,
4169             0) != argc)
4170                 return (DCMD_USAGE);
4171 
4172         if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4173                 verbose = 1;
4174 
4175         if (!(flags & DCMD_ADDRSPEC)) {
4176                 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4177                     argc, argv) == -1) {
4178                         mdb_warn("can't walk umem_cache");
4179                         return (DCMD_ERR);
4180                 }
4181                 return (DCMD_OK);
4182         }
4183 
4184         if (!mdb_vread(&c, sizeof (c), addr)) {
4185                 mdb_warn("unable to read cache at %p", addr);
4186                 return (DCMD_ERR);
4187         }
4188 
4189         if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4190                 if (!(flags & DCMD_LOOP))
4191                         mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4192                             "by malloc()\n", c.cache_name);
4193                 skip = 1;
4194         }
4195 
4196         /*
4197          * normally, print the header only the first time.  In verbose mode,
4198          * print the header on every non-skipped buffer
4199          */
4200         if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4201                 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4202                     "CACHE", "BUFSZ", "MAXMAL",
4203                     "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4204 
4205         if (skip)
4206                 return (DCMD_OK);
4207 
4208         maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4209 #ifdef _LP64
4210         if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4211                 maxmalloc -= sizeof (struct malloc_data);
4212 #endif
4213 
4214         bzero(&mi, sizeof (mi));
4215         mi.um_cp = &c;
4216         if (verbose)
4217                 mi.um_bucket =
4218                     mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4219                     UM_SLEEP | UM_GC);
4220 
4221         if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4222             -1) {
4223                 mdb_warn("can't walk 'umem'");
4224                 return (DCMD_ERR);
4225         }
4226 
4227         overhead = mi.um_malloc_overhead;
4228         allocated = mi.um_malloc_size;
4229 
4230         /* do integer round off for the average */
4231         if (mi.um_malloc != 0)
4232                 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4233         else
4234                 avg_malloc = 0;
4235 
4236         /*
4237          * include per-slab overhead
4238          *
4239          * Each slab in a given cache is the same size, and has the same
4240          * number of chunks in it;  we read in the first slab on the
4241          * slab list to get the number of chunks for all slabs.  To
4242          * compute the per-slab overhead, we just subtract the chunk usage
4243          * from the slabsize:
4244          *
4245          * +------------+-------+-------+ ... --+-------+-------+-------+
4246          * |////////////|       |       | ...   |       |///////|///////|
4247          * |////color///| chunk | chunk | ...   | chunk |/color/|/slab//|
4248          * |////////////|       |       | ...   |       |///////|///////|
4249          * +------------+-------+-------+ ... --+-------+-------+-------+
4250          * |            \_______chunksize * chunks_____/                |
4251          * \__________________________slabsize__________________________/
4252          *
4253          * For UMF_HASH caches, there is an additional source of overhead;
4254          * the external umem_slab_t and per-chunk bufctl structures.  We
4255          * include those in our per-slab overhead.
4256          *
4257          * Once we have a number for the per-slab overhead, we estimate
4258          * the actual overhead by treating the malloc()ed buffers as if
4259          * they were densely packed:
4260          *
4261          *      additional overhead = (# mallocs) * (per-slab) / (chunks);
4262          *
4263          * carefully ordering the multiply before the divide, to avoid
4264          * round-off error.
4265          */
4266         if (mi.um_malloc != 0) {
4267                 umem_slab_t slab;
4268                 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4269 
4270                 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4271                         mdb_warn("unable to read slab at %p\n", saddr);
4272                 } else {
4273                         long chunks = slab.slab_chunks;
4274                         if (chunks != 0 && c.cache_chunksize != 0 &&
4275                             chunks <= c.cache_slabsize / c.cache_chunksize) {
4276                                 uintmax_t perslab =
4277                                     c.cache_slabsize -
4278                                     (c.cache_chunksize * chunks);
4279 
4280                                 if (c.cache_flags & UMF_HASH) {
4281                                         perslab += sizeof (umem_slab_t) +
4282                                             chunks *
4283                                             ((c.cache_flags & UMF_AUDIT) ?
4284                                             sizeof (umem_bufctl_audit_t) :
4285                                             sizeof (umem_bufctl_t));
4286                                 }
4287                                 overhead +=
4288                                     (perslab * (uintmax_t)mi.um_malloc)/chunks;
4289                         } else {
4290                                 mdb_warn("invalid #chunks (%d) in slab %p\n",
4291                                     chunks, saddr);
4292                         }
4293                 }
4294         }
4295 
4296         if (allocated != 0)
4297                 overhead_pct = (1000ULL * overhead) / allocated;
4298         else
4299                 overhead_pct = 0;
4300 
4301         mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4302             addr, c.cache_bufsize, maxmalloc,
4303             mi.um_malloc, avg_malloc, allocated, overhead,
4304             overhead_pct / 10, overhead_pct % 10);
4305 
4306         if (!verbose)
4307                 return (DCMD_OK);
4308 
4309         if (!dump)
4310                 mdb_printf("\n");
4311 
4312         if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4313                 return (DCMD_ERR);
4314 
4315         for (idx = 0; idx < num; idx++) {
4316                 if (alloc_sizes[idx] == c.cache_bufsize)
4317                         break;
4318                 if (alloc_sizes[idx] == 0) {
4319                         idx = num;      /* 0-terminated array */
4320                         break;
4321                 }
4322         }
4323         if (idx == num) {
4324                 mdb_warn(
4325                     "cache %p's size (%d) not in umem_alloc_sizes\n",
4326                     addr, c.cache_bufsize);
4327                 return (DCMD_ERR);
4328         }
4329 
4330         minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4331         if (minmalloc > 0) {
4332 #ifdef _LP64
4333                 if (minmalloc > UMEM_SECOND_ALIGN)
4334                         minmalloc -= sizeof (struct malloc_data);
4335 #endif
4336                 minmalloc -= sizeof (struct malloc_data);
4337                 minmalloc += 1;
4338         }
4339 
4340         if (dump) {
4341                 for (idx = minmalloc; idx <= maxmalloc; idx++)
4342                         mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4343                 mdb_printf("\n");
4344         } else {
4345                 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4346                     maxbuckets, minbucketsize, geometric);
4347         }
4348 
4349         return (DCMD_OK);
4350 }