Print this page
10132 smatch fixes for MDB
Reviewed by: Andy Fiddaman <andy@omniosce.org>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/mdb/common/modules/libumem/umem.c
+++ new/usr/src/cmd/mdb/common/modules/libumem/umem.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 - * Copyright 2012 Joyent, Inc. All rights reserved.
27 + * Copyright (c) 2018, Joyent, Inc.
28 28 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
29 29 */
30 30
31 31 #include "umem.h"
32 32
33 33 #include <sys/vmem_impl_user.h>
34 34 #include <umem_impl.h>
35 35
36 36 #include <alloca.h>
37 37 #include <limits.h>
38 38 #include <mdb/mdb_whatis.h>
39 39 #include <thr_uberdata.h>
40 40
41 41 #include "misc.h"
42 42 #include "leaky.h"
43 43 #include "dist.h"
44 44
45 45 #include "umem_pagesize.h"
46 46
47 47 #define UM_ALLOCATED 0x1
48 48 #define UM_FREE 0x2
49 49 #define UM_BUFCTL 0x4
50 50 #define UM_HASH 0x8
51 51
52 52 int umem_ready;
53 53
54 54 static int umem_stack_depth_warned;
55 55 static uint32_t umem_max_ncpus;
56 56 uint32_t umem_stack_depth;
57 57
58 58 size_t umem_pagesize;
59 59
60 60 #define UMEM_READVAR(var) \
61 61 (umem_readvar(&(var), #var) == -1 && \
62 62 (mdb_warn("failed to read "#var), 1))
63 63
64 64 int
65 65 umem_update_variables(void)
66 66 {
67 67 size_t pagesize;
68 68
69 69 /*
70 70 * Figure out which type of umem is being used; if it's not there
71 71 * yet, succeed quietly.
72 72 */
73 73 if (umem_set_standalone() == -1) {
74 74 umem_ready = 0;
75 75 return (0); /* umem not there yet */
76 76 }
77 77
78 78 /*
79 79 * Solaris 9 used a different name for umem_max_ncpus. It's
80 80 * cheap backwards compatibility to check for both names.
81 81 */
82 82 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
83 83 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
84 84 mdb_warn("unable to read umem_max_ncpus or max_ncpus");
85 85 return (-1);
86 86 }
87 87 if (UMEM_READVAR(umem_ready))
88 88 return (-1);
89 89 if (UMEM_READVAR(umem_stack_depth))
90 90 return (-1);
91 91 if (UMEM_READVAR(pagesize))
92 92 return (-1);
93 93
94 94 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
95 95 if (umem_stack_depth_warned == 0) {
96 96 mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
97 97 umem_stack_depth, UMEM_MAX_STACK_DEPTH);
98 98 umem_stack_depth_warned = 1;
99 99 }
100 100 umem_stack_depth = 0;
101 101 }
102 102
103 103 umem_pagesize = pagesize;
104 104
105 105 return (0);
106 106 }
107 107
108 108 static int
109 109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
110 110 {
111 111 if (wsp->walk_addr == NULL) {
112 112 if (mdb_layered_walk("ulwp", wsp) == -1) {
113 113 mdb_warn("couldn't walk 'ulwp'");
114 114 return (WALK_ERR);
115 115 }
116 116 }
117 117
118 118 return (WALK_NEXT);
119 119 }
120 120
121 121 static int
122 122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
123 123 {
124 124 uintptr_t this;
125 125 int rval;
126 126
127 127 if (wsp->walk_layer != NULL) {
128 128 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
129 129 (uintptr_t)wsp->walk_arg;
130 130 } else {
131 131 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
132 132 }
133 133
134 134 for (;;) {
135 135 if (mdb_vread(&this, sizeof (void *), this) == -1) {
136 136 mdb_warn("couldn't read ptc buffer at %p", this);
137 137 return (WALK_ERR);
138 138 }
139 139
140 140 if (this == NULL)
141 141 break;
142 142
143 143 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
144 144
145 145 if (rval != WALK_NEXT)
146 146 return (rval);
147 147 }
148 148
149 149 return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
150 150 }
151 151
152 152 /*ARGSUSED*/
153 153 static int
154 154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
155 155 {
156 156 mdb_walker_t w;
157 157 char descr[64];
158 158 char name[64];
159 159 int i;
160 160
161 161 (void) mdb_snprintf(descr, sizeof (descr),
162 162 "walk the %s cache", c->cache_name);
163 163
164 164 w.walk_name = c->cache_name;
165 165 w.walk_descr = descr;
166 166 w.walk_init = umem_walk_init;
167 167 w.walk_step = umem_walk_step;
168 168 w.walk_fini = umem_walk_fini;
169 169 w.walk_init_arg = (void *)addr;
170 170
171 171 if (mdb_add_walker(&w) == -1)
172 172 mdb_warn("failed to add %s walker", c->cache_name);
173 173
174 174 if (!(c->cache_flags & UMF_PTC))
175 175 return (WALK_NEXT);
176 176
177 177 /*
178 178 * For the per-thread cache walker, the address is the offset in the
179 179 * tm_roots[] array of the ulwp_t.
180 180 */
181 181 for (i = 0; sizes[i] != 0; i++) {
182 182 if (sizes[i] == c->cache_bufsize)
183 183 break;
184 184 }
185 185
186 186 if (sizes[i] == 0) {
187 187 mdb_warn("cache %s is cached per-thread, but could not find "
188 188 "size in umem_alloc_sizes\n", c->cache_name);
189 189 return (WALK_NEXT);
190 190 }
191 191
192 192 if (i >= NTMEMBASE) {
193 193 mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
194 194 c->cache_name, i, NTMEMBASE);
195 195 return (WALK_NEXT);
196 196 }
197 197
198 198 (void) mdb_snprintf(name, sizeof (name),
199 199 "umem_ptc_%d", c->cache_bufsize);
200 200 (void) mdb_snprintf(descr, sizeof (descr),
201 201 "walk the per-thread cache for %s", c->cache_name);
202 202
203 203 w.walk_name = name;
204 204 w.walk_descr = descr;
205 205 w.walk_init = umem_ptc_walk_init;
206 206 w.walk_step = umem_ptc_walk_step;
207 207 w.walk_fini = NULL;
208 208 w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
209 209
210 210 if (mdb_add_walker(&w) == -1)
211 211 mdb_warn("failed to add %s walker", w.walk_name);
212 212
213 213 return (WALK_NEXT);
214 214 }
215 215
216 216 /*ARGSUSED*/
217 217 static void
218 218 umem_statechange_cb(void *arg)
219 219 {
220 220 static int been_ready = 0;
221 221 GElf_Sym sym;
222 222 int *sizes;
223 223
224 224 #ifndef _KMDB
225 225 leaky_cleanup(1); /* state changes invalidate leaky state */
226 226 #endif
227 227
228 228 if (umem_update_variables() == -1)
229 229 return;
230 230
231 231 if (been_ready)
232 232 return;
233 233
234 234 if (umem_ready != UMEM_READY)
235 235 return;
236 236
237 237 been_ready = 1;
238 238
239 239 /*
240 240 * In order to determine the tm_roots offset of any cache that is
241 241 * cached per-thread, we need to have the umem_alloc_sizes array.
242 242 * Read this, assuring that it is zero-terminated.
243 243 */
244 244 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
245 245 mdb_warn("unable to lookup 'umem_alloc_sizes'");
246 246 return;
247 247 }
248 248
249 249 sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
250 250
251 251 if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
252 252 mdb_warn("couldn't read 'umem_alloc_sizes'");
253 253 return;
254 254 }
255 255
256 256 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
257 257 }
258 258
259 259 int
260 260 umem_abort_messages(void)
261 261 {
262 262 char *umem_error_buffer;
263 263 uint_t umem_error_begin;
264 264 GElf_Sym sym;
265 265 size_t bufsize;
266 266
267 267 if (UMEM_READVAR(umem_error_begin))
268 268 return (DCMD_ERR);
269 269
270 270 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
271 271 mdb_warn("unable to look up umem_error_buffer");
272 272 return (DCMD_ERR);
273 273 }
274 274
275 275 bufsize = (size_t)sym.st_size;
276 276
277 277 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
278 278
279 279 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
280 280 != bufsize) {
281 281 mdb_warn("unable to read umem_error_buffer");
282 282 return (DCMD_ERR);
283 283 }
284 284 /* put a zero after the end of the buffer to simplify printing */
285 285 umem_error_buffer[bufsize] = 0;
286 286
287 287 if ((umem_error_begin % bufsize) == 0)
288 288 mdb_printf("%s\n", umem_error_buffer);
289 289 else {
290 290 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
291 291 mdb_printf("%s%s\n",
292 292 &umem_error_buffer[umem_error_begin % bufsize],
293 293 umem_error_buffer);
294 294 }
295 295
296 296 return (DCMD_OK);
297 297 }
298 298
299 299 static void
300 300 umem_log_status(const char *name, umem_log_header_t *val)
301 301 {
302 302 umem_log_header_t my_lh;
303 303 uintptr_t pos = (uintptr_t)val;
304 304 size_t size;
305 305
306 306 if (pos == NULL)
307 307 return;
308 308
309 309 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
310 310 mdb_warn("\nunable to read umem_%s_log pointer %p",
311 311 name, pos);
312 312 return;
313 313 }
314 314
315 315 size = my_lh.lh_chunksize * my_lh.lh_nchunks;
316 316
317 317 if (size % (1024 * 1024) == 0)
318 318 mdb_printf("%s=%dm ", name, size / (1024 * 1024));
319 319 else if (size % 1024 == 0)
320 320 mdb_printf("%s=%dk ", name, size / 1024);
321 321 else
322 322 mdb_printf("%s=%d ", name, size);
323 323 }
324 324
325 325 typedef struct umem_debug_flags {
326 326 const char *udf_name;
327 327 uint_t udf_flags;
328 328 uint_t udf_clear; /* if 0, uses udf_flags */
329 329 } umem_debug_flags_t;
330 330
331 331 umem_debug_flags_t umem_status_flags[] = {
332 332 { "random", UMF_RANDOMIZE, UMF_RANDOM },
333 333 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
334 334 { "audit", UMF_AUDIT },
335 335 { "guards", UMF_DEADBEEF | UMF_REDZONE },
336 336 { "nosignal", UMF_CHECKSIGNAL },
337 337 { "firewall", UMF_FIREWALL },
338 338 { "lite", UMF_LITE },
339 339 { "checknull", UMF_CHECKNULL },
340 340 { NULL }
341 341 };
342 342
343 343 /*ARGSUSED*/
344 344 int
345 345 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
346 346 {
347 347 int umem_logging;
348 348
349 349 umem_log_header_t *umem_transaction_log;
350 350 umem_log_header_t *umem_content_log;
351 351 umem_log_header_t *umem_failure_log;
352 352 umem_log_header_t *umem_slab_log;
353 353
354 354 mdb_printf("Status:\t\t%s\n",
355 355 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
356 356 umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
357 357 umem_ready == UMEM_READY_INITING ? "initialization in process" :
358 358 umem_ready == UMEM_READY ? "ready and active" :
359 359 umem_ready == 0 ? "not loaded into address space" :
360 360 "unknown (umem_ready invalid)");
361 361
362 362 if (umem_ready == 0)
363 363 return (DCMD_OK);
364 364
365 365 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
366 366
367 367 if (UMEM_READVAR(umem_logging))
368 368 goto err;
369 369 if (UMEM_READVAR(umem_transaction_log))
370 370 goto err;
371 371 if (UMEM_READVAR(umem_content_log))
372 372 goto err;
373 373 if (UMEM_READVAR(umem_failure_log))
374 374 goto err;
375 375 if (UMEM_READVAR(umem_slab_log))
376 376 goto err;
377 377
378 378 mdb_printf("Logs:\t\t");
379 379 umem_log_status("transaction", umem_transaction_log);
380 380 umem_log_status("content", umem_content_log);
381 381 umem_log_status("fail", umem_failure_log);
382 382 umem_log_status("slab", umem_slab_log);
383 383 if (!umem_logging)
384 384 mdb_printf("(inactive)");
385 385 mdb_printf("\n");
386 386
387 387 mdb_printf("Message buffer:\n");
388 388 return (umem_abort_messages());
389 389
390 390 err:
391 391 mdb_printf("Message buffer:\n");
392 392 (void) umem_abort_messages();
393 393 return (DCMD_ERR);
394 394 }
395 395
396 396 typedef struct {
397 397 uintptr_t ucw_first;
398 398 uintptr_t ucw_current;
399 399 } umem_cache_walk_t;
400 400
401 401 int
402 402 umem_cache_walk_init(mdb_walk_state_t *wsp)
403 403 {
404 404 umem_cache_walk_t *ucw;
405 405 umem_cache_t c;
406 406 uintptr_t cp;
407 407 GElf_Sym sym;
408 408
409 409 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
410 410 mdb_warn("couldn't find umem_null_cache");
411 411 return (WALK_ERR);
412 412 }
413 413
414 414 cp = (uintptr_t)sym.st_value;
415 415
416 416 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
417 417 mdb_warn("couldn't read cache at %p", cp);
418 418 return (WALK_ERR);
419 419 }
420 420
421 421 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
422 422
423 423 ucw->ucw_first = cp;
424 424 ucw->ucw_current = (uintptr_t)c.cache_next;
425 425 wsp->walk_data = ucw;
426 426
427 427 return (WALK_NEXT);
428 428 }
429 429
430 430 int
431 431 umem_cache_walk_step(mdb_walk_state_t *wsp)
432 432 {
433 433 umem_cache_walk_t *ucw = wsp->walk_data;
434 434 umem_cache_t c;
435 435 int status;
436 436
437 437 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
438 438 mdb_warn("couldn't read cache at %p", ucw->ucw_current);
439 439 return (WALK_DONE);
440 440 }
441 441
442 442 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
443 443
444 444 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
445 445 return (WALK_DONE);
446 446
447 447 return (status);
448 448 }
449 449
450 450 void
451 451 umem_cache_walk_fini(mdb_walk_state_t *wsp)
452 452 {
453 453 umem_cache_walk_t *ucw = wsp->walk_data;
454 454 mdb_free(ucw, sizeof (umem_cache_walk_t));
455 455 }
456 456
457 457 typedef struct {
458 458 umem_cpu_t *ucw_cpus;
459 459 uint32_t ucw_current;
460 460 uint32_t ucw_max;
461 461 } umem_cpu_walk_state_t;
462 462
463 463 int
464 464 umem_cpu_walk_init(mdb_walk_state_t *wsp)
465 465 {
466 466 umem_cpu_t *umem_cpus;
467 467
468 468 umem_cpu_walk_state_t *ucw;
469 469
470 470 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
471 471 mdb_warn("failed to read 'umem_cpus'");
472 472 return (WALK_ERR);
473 473 }
474 474
475 475 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
476 476
477 477 ucw->ucw_cpus = umem_cpus;
478 478 ucw->ucw_current = 0;
479 479 ucw->ucw_max = umem_max_ncpus;
480 480
481 481 wsp->walk_data = ucw;
482 482 return (WALK_NEXT);
483 483 }
484 484
485 485 int
486 486 umem_cpu_walk_step(mdb_walk_state_t *wsp)
487 487 {
488 488 umem_cpu_t cpu;
489 489 umem_cpu_walk_state_t *ucw = wsp->walk_data;
490 490
491 491 uintptr_t caddr;
492 492
493 493 if (ucw->ucw_current >= ucw->ucw_max)
494 494 return (WALK_DONE);
495 495
496 496 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
497 497
498 498 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
499 499 mdb_warn("failed to read cpu %d", ucw->ucw_current);
500 500 return (WALK_ERR);
501 501 }
502 502
503 503 ucw->ucw_current++;
504 504
505 505 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
506 506 }
507 507
508 508 void
509 509 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
510 510 {
511 511 umem_cpu_walk_state_t *ucw = wsp->walk_data;
512 512
513 513 mdb_free(ucw, sizeof (*ucw));
514 514 }
515 515
516 516 int
517 517 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
518 518 {
519 519 if (wsp->walk_addr == NULL) {
520 520 mdb_warn("umem_cpu_cache doesn't support global walks");
521 521 return (WALK_ERR);
522 522 }
523 523
524 524 if (mdb_layered_walk("umem_cpu", wsp) == -1) {
525 525 mdb_warn("couldn't walk 'umem_cpu'");
526 526 return (WALK_ERR);
527 527 }
528 528
529 529 wsp->walk_data = (void *)wsp->walk_addr;
530 530
531 531 return (WALK_NEXT);
532 532 }
533 533
534 534 int
535 535 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
536 536 {
537 537 uintptr_t caddr = (uintptr_t)wsp->walk_data;
538 538 const umem_cpu_t *cpu = wsp->walk_layer;
539 539 umem_cpu_cache_t cc;
540 540
541 541 caddr += cpu->cpu_cache_offset;
542 542
543 543 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
544 544 mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
545 545 return (WALK_ERR);
546 546 }
547 547
548 548 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
549 549 }
550 550
551 551 int
552 552 umem_slab_walk_init(mdb_walk_state_t *wsp)
553 553 {
554 554 uintptr_t caddr = wsp->walk_addr;
555 555 umem_cache_t c;
556 556
557 557 if (caddr == NULL) {
558 558 mdb_warn("umem_slab doesn't support global walks\n");
559 559 return (WALK_ERR);
560 560 }
561 561
562 562 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
563 563 mdb_warn("couldn't read umem_cache at %p", caddr);
564 564 return (WALK_ERR);
565 565 }
566 566
567 567 wsp->walk_data =
568 568 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
569 569 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
570 570
571 571 return (WALK_NEXT);
572 572 }
573 573
574 574 int
575 575 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
576 576 {
577 577 uintptr_t caddr = wsp->walk_addr;
578 578 umem_cache_t c;
579 579
580 580 if (caddr == NULL) {
581 581 mdb_warn("umem_slab_partial doesn't support global walks\n");
582 582 return (WALK_ERR);
583 583 }
584 584
585 585 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
586 586 mdb_warn("couldn't read umem_cache at %p", caddr);
587 587 return (WALK_ERR);
588 588 }
589 589
590 590 wsp->walk_data =
591 591 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
592 592 wsp->walk_addr = (uintptr_t)c.cache_freelist;
593 593
594 594 /*
595 595 * Some consumers (umem_walk_step(), in particular) require at
596 596 * least one callback if there are any buffers in the cache. So
597 597 * if there are *no* partial slabs, report the last full slab, if
598 598 * any.
599 599 *
600 600 * Yes, this is ugly, but it's cleaner than the other possibilities.
601 601 */
602 602 if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
603 603 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
604 604
605 605 return (WALK_NEXT);
606 606 }
607 607
608 608 int
609 609 umem_slab_walk_step(mdb_walk_state_t *wsp)
610 610 {
611 611 umem_slab_t s;
612 612 uintptr_t addr = wsp->walk_addr;
613 613 uintptr_t saddr = (uintptr_t)wsp->walk_data;
614 614 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
615 615
616 616 if (addr == saddr)
617 617 return (WALK_DONE);
618 618
619 619 if (mdb_vread(&s, sizeof (s), addr) == -1) {
620 620 mdb_warn("failed to read slab at %p", wsp->walk_addr);
621 621 return (WALK_ERR);
622 622 }
623 623
624 624 if ((uintptr_t)s.slab_cache != caddr) {
625 625 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
626 626 addr, caddr, s.slab_cache);
627 627 return (WALK_ERR);
628 628 }
629 629
630 630 wsp->walk_addr = (uintptr_t)s.slab_next;
631 631
632 632 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
633 633 }
634 634
635 635 int
636 636 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
637 637 {
638 638 umem_cache_t c;
639 639
640 640 if (!(flags & DCMD_ADDRSPEC)) {
641 641 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
642 642 mdb_warn("can't walk umem_cache");
643 643 return (DCMD_ERR);
644 644 }
645 645 return (DCMD_OK);
646 646 }
647 647
648 648 if (DCMD_HDRSPEC(flags))
649 649 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
650 650 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
651 651
652 652 if (mdb_vread(&c, sizeof (c), addr) == -1) {
653 653 mdb_warn("couldn't read umem_cache at %p", addr);
654 654 return (DCMD_ERR);
655 655 }
656 656
657 657 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
658 658 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
659 659
660 660 return (DCMD_OK);
661 661 }
662 662
663 663 static int
664 664 addrcmp(const void *lhs, const void *rhs)
665 665 {
666 666 uintptr_t p1 = *((uintptr_t *)lhs);
667 667 uintptr_t p2 = *((uintptr_t *)rhs);
668 668
669 669 if (p1 < p2)
670 670 return (-1);
671 671 if (p1 > p2)
672 672 return (1);
673 673 return (0);
674 674 }
675 675
676 676 static int
677 677 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
678 678 {
679 679 const umem_bufctl_audit_t *bcp1 = *lhs;
680 680 const umem_bufctl_audit_t *bcp2 = *rhs;
681 681
682 682 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
683 683 return (-1);
684 684
685 685 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
686 686 return (1);
687 687
688 688 return (0);
689 689 }
690 690
691 691 typedef struct umem_hash_walk {
692 692 uintptr_t *umhw_table;
693 693 size_t umhw_nelems;
694 694 size_t umhw_pos;
695 695 umem_bufctl_t umhw_cur;
696 696 } umem_hash_walk_t;
697 697
698 698 int
699 699 umem_hash_walk_init(mdb_walk_state_t *wsp)
700 700 {
701 701 umem_hash_walk_t *umhw;
702 702 uintptr_t *hash;
703 703 umem_cache_t c;
704 704 uintptr_t haddr, addr = wsp->walk_addr;
705 705 size_t nelems;
706 706 size_t hsize;
707 707
708 708 if (addr == NULL) {
709 709 mdb_warn("umem_hash doesn't support global walks\n");
710 710 return (WALK_ERR);
711 711 }
712 712
713 713 if (mdb_vread(&c, sizeof (c), addr) == -1) {
714 714 mdb_warn("couldn't read cache at addr %p", addr);
715 715 return (WALK_ERR);
716 716 }
717 717
718 718 if (!(c.cache_flags & UMF_HASH)) {
719 719 mdb_warn("cache %p doesn't have a hash table\n", addr);
720 720 return (WALK_DONE); /* nothing to do */
721 721 }
722 722
723 723 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
724 724 umhw->umhw_cur.bc_next = NULL;
725 725 umhw->umhw_pos = 0;
726 726
727 727 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
728 728 hsize = nelems * sizeof (uintptr_t);
729 729 haddr = (uintptr_t)c.cache_hash_table;
730 730
731 731 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
732 732 if (mdb_vread(hash, hsize, haddr) == -1) {
733 733 mdb_warn("failed to read hash table at %p", haddr);
734 734 mdb_free(hash, hsize);
735 735 mdb_free(umhw, sizeof (umem_hash_walk_t));
736 736 return (WALK_ERR);
737 737 }
738 738
739 739 wsp->walk_data = umhw;
740 740
741 741 return (WALK_NEXT);
742 742 }
743 743
744 744 int
745 745 umem_hash_walk_step(mdb_walk_state_t *wsp)
746 746 {
747 747 umem_hash_walk_t *umhw = wsp->walk_data;
748 748 uintptr_t addr = NULL;
749 749
750 750 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
751 751 while (umhw->umhw_pos < umhw->umhw_nelems) {
752 752 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
753 753 break;
754 754 }
755 755 }
756 756 if (addr == NULL)
757 757 return (WALK_DONE);
758 758
759 759 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
760 760 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
761 761 return (WALK_ERR);
762 762 }
763 763
764 764 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
765 765 }
766 766
767 767 void
768 768 umem_hash_walk_fini(mdb_walk_state_t *wsp)
769 769 {
770 770 umem_hash_walk_t *umhw = wsp->walk_data;
771 771
772 772 if (umhw == NULL)
773 773 return;
774 774
775 775 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
776 776 mdb_free(umhw, sizeof (umem_hash_walk_t));
777 777 }
778 778
779 779 /*
780 780 * Find the address of the bufctl structure for the address 'buf' in cache
781 781 * 'cp', which is at address caddr, and place it in *out.
782 782 */
783 783 static int
784 784 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
785 785 {
786 786 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
787 787 umem_bufctl_t *bcp;
788 788 umem_bufctl_t bc;
789 789
790 790 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
791 791 mdb_warn("unable to read hash bucket for %p in cache %p",
792 792 buf, caddr);
793 793 return (-1);
794 794 }
795 795
796 796 while (bcp != NULL) {
797 797 if (mdb_vread(&bc, sizeof (umem_bufctl_t),
798 798 (uintptr_t)bcp) == -1) {
799 799 mdb_warn("unable to read bufctl at %p", bcp);
800 800 return (-1);
801 801 }
802 802 if (bc.bc_addr == buf) {
803 803 *out = (uintptr_t)bcp;
804 804 return (0);
805 805 }
806 806 bcp = bc.bc_next;
807 807 }
808 808
809 809 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
810 810 return (-1);
811 811 }
812 812
813 813 int
814 814 umem_get_magsize(const umem_cache_t *cp)
815 815 {
816 816 uintptr_t addr = (uintptr_t)cp->cache_magtype;
817 817 GElf_Sym mt_sym;
818 818 umem_magtype_t mt;
819 819 int res;
820 820
821 821 /*
822 822 * if cpu 0 has a non-zero magsize, it must be correct. caches
823 823 * with UMF_NOMAGAZINE have disabled their magazine layers, so
824 824 * it is okay to return 0 for them.
825 825 */
826 826 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
827 827 (cp->cache_flags & UMF_NOMAGAZINE))
828 828 return (res);
829 829
830 830 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
831 831 mdb_warn("unable to read 'umem_magtype'");
832 832 } else if (addr < mt_sym.st_value ||
833 833 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
834 834 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
835 835 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
836 836 cp->cache_name, addr);
837 837 return (0);
838 838 }
839 839 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
840 840 mdb_warn("unable to read magtype at %a", addr);
841 841 return (0);
842 842 }
843 843 return (mt.mt_magsize);
844 844 }
845 845
846 846 /*ARGSUSED*/
847 847 static int
848 848 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
849 849 {
850 850 *est -= (sp->slab_chunks - sp->slab_refcnt);
851 851
852 852 return (WALK_NEXT);
853 853 }
854 854
855 855 /*
856 856 * Returns an upper bound on the number of allocated buffers in a given
857 857 * cache.
858 858 */
859 859 size_t
860 860 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
861 861 {
862 862 int magsize;
863 863 size_t cache_est;
864 864
865 865 cache_est = cp->cache_buftotal;
866 866
867 867 (void) mdb_pwalk("umem_slab_partial",
868 868 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
869 869
870 870 if ((magsize = umem_get_magsize(cp)) != 0) {
871 871 size_t mag_est = cp->cache_full.ml_total * magsize;
872 872
873 873 if (cache_est >= mag_est) {
874 874 cache_est -= mag_est;
875 875 } else {
876 876 mdb_warn("cache %p's magazine layer holds more buffers "
877 877 "than the slab layer.\n", addr);
878 878 }
879 879 }
880 880 return (cache_est);
881 881 }
882 882
883 883 #define READMAG_ROUNDS(rounds) { \
884 884 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
885 885 mdb_warn("couldn't read magazine at %p", ump); \
886 886 goto fail; \
887 887 } \
888 888 for (i = 0; i < rounds; i++) { \
889 889 maglist[magcnt++] = mp->mag_round[i]; \
890 890 if (magcnt == magmax) { \
891 891 mdb_warn("%d magazines exceeds fudge factor\n", \
892 892 magcnt); \
893 893 goto fail; \
894 894 } \
895 895 } \
896 896 }
897 897
898 898 static int
899 899 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
900 900 void ***maglistp, size_t *magcntp, size_t *magmaxp)
901 901 {
902 902 umem_magazine_t *ump, *mp;
903 903 void **maglist = NULL;
904 904 int i, cpu;
905 905 size_t magsize, magmax, magbsize;
906 906 size_t magcnt = 0;
907 907
908 908 /*
909 909 * Read the magtype out of the cache, after verifying the pointer's
910 910 * correctness.
911 911 */
912 912 magsize = umem_get_magsize(cp);
913 913 if (magsize == 0) {
914 914 *maglistp = NULL;
915 915 *magcntp = 0;
916 916 *magmaxp = 0;
917 917 return (0);
918 918 }
919 919
920 920 /*
921 921 * There are several places where we need to go buffer hunting:
922 922 * the per-CPU loaded magazine, the per-CPU spare full magazine,
923 923 * and the full magazine list in the depot.
924 924 *
925 925 * For an upper bound on the number of buffers in the magazine
926 926 * layer, we have the number of magazines on the cache_full
927 927 * list plus at most two magazines per CPU (the loaded and the
928 928 * spare). Toss in 100 magazines as a fudge factor in case this
929 929 * is live (the number "100" comes from the same fudge factor in
930 930 * crash(1M)).
931 931 */
932 932 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
933 933 magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
934 934
935 935 if (magbsize >= PAGESIZE / 2) {
936 936 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
937 937 addr, magbsize);
938 938 return (-1);
939 939 }
940 940
941 941 maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
942 942 mp = mdb_alloc(magbsize, UM_SLEEP);
943 943 if (mp == NULL || maglist == NULL)
944 944 goto fail;
945 945
946 946 /*
947 947 * First up: the magazines in the depot (i.e. on the cache_full list).
948 948 */
949 949 for (ump = cp->cache_full.ml_list; ump != NULL; ) {
950 950 READMAG_ROUNDS(magsize);
951 951 ump = mp->mag_next;
952 952
953 953 if (ump == cp->cache_full.ml_list)
954 954 break; /* cache_full list loop detected */
955 955 }
956 956
957 957 dprintf(("cache_full list done\n"));
958 958
959 959 /*
960 960 * Now whip through the CPUs, snagging the loaded magazines
961 961 * and full spares.
962 962 */
963 963 for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
964 964 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
965 965
966 966 dprintf(("reading cpu cache %p\n",
967 967 (uintptr_t)ccp - (uintptr_t)cp + addr));
968 968
969 969 if (ccp->cc_rounds > 0 &&
970 970 (ump = ccp->cc_loaded) != NULL) {
971 971 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
972 972 READMAG_ROUNDS(ccp->cc_rounds);
973 973 }
974 974
975 975 if (ccp->cc_prounds > 0 &&
976 976 (ump = ccp->cc_ploaded) != NULL) {
977 977 dprintf(("reading %d previously loaded rounds\n",
978 978 ccp->cc_prounds));
979 979 READMAG_ROUNDS(ccp->cc_prounds);
980 980 }
981 981 }
982 982
983 983 dprintf(("magazine layer: %d buffers\n", magcnt));
984 984
985 985 mdb_free(mp, magbsize);
986 986
987 987 *maglistp = maglist;
988 988 *magcntp = magcnt;
989 989 *magmaxp = magmax;
990 990
991 991 return (0);
992 992
993 993 fail:
994 994 if (mp)
995 995 mdb_free(mp, magbsize);
996 996 if (maglist)
997 997 mdb_free(maglist, magmax * sizeof (void *));
998 998
999 999 return (-1);
1000 1000 }
1001 1001
1002 1002 typedef struct umem_read_ptc_walk {
1003 1003 void **urpw_buf;
1004 1004 size_t urpw_cnt;
1005 1005 size_t urpw_max;
1006 1006 } umem_read_ptc_walk_t;
1007 1007
1008 1008 /*ARGSUSED*/
1009 1009 static int
1010 1010 umem_read_ptc_walk_buf(uintptr_t addr,
1011 1011 const void *ignored, umem_read_ptc_walk_t *urpw)
1012 1012 {
1013 1013 if (urpw->urpw_cnt == urpw->urpw_max) {
1014 1014 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1015 1015 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1016 1016
1017 1017 if (nmax > 1) {
1018 1018 size_t osize = urpw->urpw_max * sizeof (void *);
1019 1019 bcopy(urpw->urpw_buf, new, osize);
1020 1020 mdb_free(urpw->urpw_buf, osize);
1021 1021 }
1022 1022
1023 1023 urpw->urpw_buf = new;
1024 1024 urpw->urpw_max = nmax;
1025 1025 }
1026 1026
1027 1027 urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1028 1028
1029 1029 return (WALK_NEXT);
1030 1030 }
1031 1031
1032 1032 static int
1033 1033 umem_read_ptc(umem_cache_t *cp,
1034 1034 void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1035 1035 {
1036 1036 umem_read_ptc_walk_t urpw;
1037 1037 char walk[60];
1038 1038 int rval;
1039 1039
1040 1040 if (!(cp->cache_flags & UMF_PTC))
1041 1041 return (0);
1042 1042
1043 1043 (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1044 1044 cp->cache_bufsize);
1045 1045
1046 1046 urpw.urpw_buf = *buflistp;
1047 1047 urpw.urpw_cnt = *bufcntp;
1048 1048 urpw.urpw_max = *bufmaxp;
1049 1049
1050 1050 if ((rval = mdb_walk(walk,
1051 1051 (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1052 1052 mdb_warn("couldn't walk %s", walk);
1053 1053 }
1054 1054
1055 1055 *buflistp = urpw.urpw_buf;
1056 1056 *bufcntp = urpw.urpw_cnt;
1057 1057 *bufmaxp = urpw.urpw_max;
1058 1058
1059 1059 return (rval);
1060 1060 }
1061 1061
1062 1062 static int
1063 1063 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1064 1064 {
1065 1065 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1066 1066 }
1067 1067
1068 1068 static int
1069 1069 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1070 1070 {
1071 1071 umem_bufctl_audit_t *b;
1072 1072 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1073 1073
1074 1074 /*
1075 1075 * if UMF_AUDIT is not set, we know that we're looking at a
1076 1076 * umem_bufctl_t.
1077 1077 */
1078 1078 if (!(cp->cache_flags & UMF_AUDIT) ||
1079 1079 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1080 1080 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1081 1081 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1082 1082 mdb_warn("unable to read bufctl at %p", buf);
1083 1083 return (WALK_ERR);
1084 1084 }
1085 1085 }
1086 1086
1087 1087 return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1088 1088 }
1089 1089
1090 1090 typedef struct umem_walk {
1091 1091 int umw_type;
1092 1092
1093 1093 uintptr_t umw_addr; /* cache address */
1094 1094 umem_cache_t *umw_cp;
1095 1095 size_t umw_csize;
1096 1096
1097 1097 /*
1098 1098 * magazine layer
1099 1099 */
1100 1100 void **umw_maglist;
1101 1101 size_t umw_max;
1102 1102 size_t umw_count;
1103 1103 size_t umw_pos;
1104 1104
1105 1105 /*
1106 1106 * slab layer
1107 1107 */
1108 1108 char *umw_valid; /* to keep track of freed buffers */
1109 1109 char *umw_ubase; /* buffer for slab data */
1110 1110 } umem_walk_t;
1111 1111
1112 1112 static int
1113 1113 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1114 1114 {
1115 1115 umem_walk_t *umw;
1116 1116 int csize;
1117 1117 umem_cache_t *cp;
1118 1118 size_t vm_quantum;
1119 1119
1120 1120 size_t magmax, magcnt;
1121 1121 void **maglist = NULL;
1122 1122 uint_t chunksize, slabsize;
1123 1123 int status = WALK_ERR;
1124 1124 uintptr_t addr = wsp->walk_addr;
1125 1125 const char *layered;
1126 1126
1127 1127 type &= ~UM_HASH;
1128 1128
1129 1129 if (addr == NULL) {
1130 1130 mdb_warn("umem walk doesn't support global walks\n");
1131 1131 return (WALK_ERR);
1132 1132 }
1133 1133
1134 1134 dprintf(("walking %p\n", addr));
1135 1135
1136 1136 /*
1137 1137 * The number of "cpus" determines how large the cache is.
1138 1138 */
1139 1139 csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1140 1140 cp = mdb_alloc(csize, UM_SLEEP);
1141 1141
1142 1142 if (mdb_vread(cp, csize, addr) == -1) {
1143 1143 mdb_warn("couldn't read cache at addr %p", addr);
1144 1144 goto out2;
1145 1145 }
1146 1146
1147 1147 /*
1148 1148 * It's easy for someone to hand us an invalid cache address.
1149 1149 * Unfortunately, it is hard for this walker to survive an
1150 1150 * invalid cache cleanly. So we make sure that:
1151 1151 *
1152 1152 * 1. the vmem arena for the cache is readable,
1153 1153 * 2. the vmem arena's quantum is a power of 2,
1154 1154 * 3. our slabsize is a multiple of the quantum, and
1155 1155 * 4. our chunksize is >0 and less than our slabsize.
1156 1156 */
1157 1157 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1158 1158 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1159 1159 vm_quantum == 0 ||
1160 1160 (vm_quantum & (vm_quantum - 1)) != 0 ||
1161 1161 cp->cache_slabsize < vm_quantum ||
1162 1162 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1163 1163 cp->cache_chunksize == 0 ||
1164 1164 cp->cache_chunksize > cp->cache_slabsize) {
1165 1165 mdb_warn("%p is not a valid umem_cache_t\n", addr);
1166 1166 goto out2;
1167 1167 }
1168 1168
1169 1169 dprintf(("buf total is %d\n", cp->cache_buftotal));
1170 1170
1171 1171 if (cp->cache_buftotal == 0) {
1172 1172 mdb_free(cp, csize);
1173 1173 return (WALK_DONE);
1174 1174 }
1175 1175
1176 1176 /*
1177 1177 * If they ask for bufctls, but it's a small-slab cache,
1178 1178 * there is nothing to report.
1179 1179 */
1180 1180 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1181 1181 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1182 1182 cp->cache_flags));
1183 1183 mdb_free(cp, csize);
1184 1184 return (WALK_DONE);
1185 1185 }
1186 1186
1187 1187 /*
1188 1188 * Read in the contents of the magazine layer
1189 1189 */
1190 1190 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1191 1191 goto out2;
1192 1192
1193 1193 /*
1194 1194 * Read in the contents of the per-thread caches, if any
1195 1195 */
1196 1196 if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1197 1197 goto out2;
1198 1198
1199 1199 /*
1200 1200 * We have all of the buffers from the magazines and from the
1201 1201 * per-thread cache (if any); if we are walking allocated buffers,
1202 1202 * sort them so we can bsearch them later.
1203 1203 */
1204 1204 if (type & UM_ALLOCATED)
1205 1205 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1206 1206
1207 1207 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1208 1208
1209 1209 umw->umw_type = type;
1210 1210 umw->umw_addr = addr;
1211 1211 umw->umw_cp = cp;
1212 1212 umw->umw_csize = csize;
1213 1213 umw->umw_maglist = maglist;
1214 1214 umw->umw_max = magmax;
1215 1215 umw->umw_count = magcnt;
1216 1216 umw->umw_pos = 0;
1217 1217
1218 1218 /*
1219 1219 * When walking allocated buffers in a UMF_HASH cache, we walk the
1220 1220 * hash table instead of the slab layer.
1221 1221 */
1222 1222 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1223 1223 layered = "umem_hash";
1224 1224
1225 1225 umw->umw_type |= UM_HASH;
1226 1226 } else {
1227 1227 /*
1228 1228 * If we are walking freed buffers, we only need the
1229 1229 * magazine layer plus the partially allocated slabs.
1230 1230 * To walk allocated buffers, we need all of the slabs.
1231 1231 */
1232 1232 if (type & UM_ALLOCATED)
1233 1233 layered = "umem_slab";
1234 1234 else
1235 1235 layered = "umem_slab_partial";
1236 1236
1237 1237 /*
1238 1238 * for small-slab caches, we read in the entire slab. For
1239 1239 * freed buffers, we can just walk the freelist. For
1240 1240 * allocated buffers, we use a 'valid' array to track
1241 1241 * the freed buffers.
1242 1242 */
1243 1243 if (!(cp->cache_flags & UMF_HASH)) {
1244 1244 chunksize = cp->cache_chunksize;
1245 1245 slabsize = cp->cache_slabsize;
1246 1246
1247 1247 umw->umw_ubase = mdb_alloc(slabsize +
1248 1248 sizeof (umem_bufctl_t), UM_SLEEP);
1249 1249
1250 1250 if (type & UM_ALLOCATED)
1251 1251 umw->umw_valid =
1252 1252 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1253 1253 }
1254 1254 }
1255 1255
1256 1256 status = WALK_NEXT;
1257 1257
1258 1258 if (mdb_layered_walk(layered, wsp) == -1) {
1259 1259 mdb_warn("unable to start layered '%s' walk", layered);
1260 1260 status = WALK_ERR;
1261 1261 }
1262 1262
1263 1263 out1:
1264 1264 if (status == WALK_ERR) {
1265 1265 if (umw->umw_valid)
1266 1266 mdb_free(umw->umw_valid, slabsize / chunksize);
1267 1267
1268 1268 if (umw->umw_ubase)
1269 1269 mdb_free(umw->umw_ubase, slabsize +
1270 1270 sizeof (umem_bufctl_t));
1271 1271
1272 1272 if (umw->umw_maglist)
1273 1273 mdb_free(umw->umw_maglist, umw->umw_max *
1274 1274 sizeof (uintptr_t));
1275 1275
1276 1276 mdb_free(umw, sizeof (umem_walk_t));
1277 1277 wsp->walk_data = NULL;
1278 1278 }
1279 1279
1280 1280 out2:
1281 1281 if (status == WALK_ERR)
1282 1282 mdb_free(cp, csize);
1283 1283
1284 1284 return (status);
1285 1285 }
1286 1286
1287 1287 int
1288 1288 umem_walk_step(mdb_walk_state_t *wsp)
1289 1289 {
1290 1290 umem_walk_t *umw = wsp->walk_data;
1291 1291 int type = umw->umw_type;
1292 1292 umem_cache_t *cp = umw->umw_cp;
1293 1293
1294 1294 void **maglist = umw->umw_maglist;
1295 1295 int magcnt = umw->umw_count;
1296 1296
1297 1297 uintptr_t chunksize, slabsize;
1298 1298 uintptr_t addr;
1299 1299 const umem_slab_t *sp;
1300 1300 const umem_bufctl_t *bcp;
1301 1301 umem_bufctl_t bc;
1302 1302
1303 1303 int chunks;
1304 1304 char *kbase;
1305 1305 void *buf;
1306 1306 int i, ret;
1307 1307
1308 1308 char *valid, *ubase;
1309 1309
1310 1310 /*
1311 1311 * first, handle the 'umem_hash' layered walk case
1312 1312 */
1313 1313 if (type & UM_HASH) {
1314 1314 /*
1315 1315 * We have a buffer which has been allocated out of the
1316 1316 * global layer. We need to make sure that it's not
1317 1317 * actually sitting in a magazine before we report it as
1318 1318 * an allocated buffer.
1319 1319 */
1320 1320 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1321 1321
1322 1322 if (magcnt > 0 &&
1323 1323 bsearch(&buf, maglist, magcnt, sizeof (void *),
1324 1324 addrcmp) != NULL)
1325 1325 return (WALK_NEXT);
1326 1326
1327 1327 if (type & UM_BUFCTL)
1328 1328 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1329 1329
1330 1330 return (umem_walk_callback(wsp, (uintptr_t)buf));
1331 1331 }
1332 1332
1333 1333 ret = WALK_NEXT;
1334 1334
1335 1335 addr = umw->umw_addr;
1336 1336
1337 1337 /*
1338 1338 * If we're walking freed buffers, report everything in the
1339 1339 * magazine layer before processing the first slab.
1340 1340 */
1341 1341 if ((type & UM_FREE) && magcnt != 0) {
1342 1342 umw->umw_count = 0; /* only do this once */
1343 1343 for (i = 0; i < magcnt; i++) {
1344 1344 buf = maglist[i];
1345 1345
1346 1346 if (type & UM_BUFCTL) {
1347 1347 uintptr_t out;
1348 1348
1349 1349 if (cp->cache_flags & UMF_BUFTAG) {
1350 1350 umem_buftag_t *btp;
1351 1351 umem_buftag_t tag;
1352 1352
1353 1353 /* LINTED - alignment */
1354 1354 btp = UMEM_BUFTAG(cp, buf);
1355 1355 if (mdb_vread(&tag, sizeof (tag),
1356 1356 (uintptr_t)btp) == -1) {
1357 1357 mdb_warn("reading buftag for "
1358 1358 "%p at %p", buf, btp);
1359 1359 continue;
1360 1360 }
1361 1361 out = (uintptr_t)tag.bt_bufctl;
1362 1362 } else {
1363 1363 if (umem_hash_lookup(cp, addr, buf,
1364 1364 &out) == -1)
1365 1365 continue;
1366 1366 }
1367 1367 ret = bufctl_walk_callback(cp, wsp, out);
1368 1368 } else {
1369 1369 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1370 1370 }
1371 1371
1372 1372 if (ret != WALK_NEXT)
1373 1373 return (ret);
1374 1374 }
1375 1375 }
1376 1376
1377 1377 /*
1378 1378 * Handle the buffers in the current slab
1379 1379 */
1380 1380 chunksize = cp->cache_chunksize;
1381 1381 slabsize = cp->cache_slabsize;
1382 1382
1383 1383 sp = wsp->walk_layer;
1384 1384 chunks = sp->slab_chunks;
1385 1385 kbase = sp->slab_base;
1386 1386
1387 1387 dprintf(("kbase is %p\n", kbase));
1388 1388
1389 1389 if (!(cp->cache_flags & UMF_HASH)) {
1390 1390 valid = umw->umw_valid;
1391 1391 ubase = umw->umw_ubase;
1392 1392
1393 1393 if (mdb_vread(ubase, chunks * chunksize,
1394 1394 (uintptr_t)kbase) == -1) {
1395 1395 mdb_warn("failed to read slab contents at %p", kbase);
1396 1396 return (WALK_ERR);
1397 1397 }
1398 1398
1399 1399 /*
1400 1400 * Set up the valid map as fully allocated -- we'll punch
1401 1401 * out the freelist.
1402 1402 */
1403 1403 if (type & UM_ALLOCATED)
1404 1404 (void) memset(valid, 1, chunks);
1405 1405 } else {
1406 1406 valid = NULL;
1407 1407 ubase = NULL;
1408 1408 }
1409 1409
1410 1410 /*
1411 1411 * walk the slab's freelist
1412 1412 */
1413 1413 bcp = sp->slab_head;
1414 1414
1415 1415 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1416 1416
1417 1417 /*
1418 1418 * since we could be in the middle of allocating a buffer,
1419 1419 * our refcnt could be one higher than it aught. So we
1420 1420 * check one further on the freelist than the count allows.
1421 1421 */
1422 1422 for (i = sp->slab_refcnt; i <= chunks; i++) {
1423 1423 uint_t ndx;
1424 1424
1425 1425 dprintf(("bcp is %p\n", bcp));
1426 1426
1427 1427 if (bcp == NULL) {
1428 1428 if (i == chunks)
1429 1429 break;
1430 1430 mdb_warn(
1431 1431 "slab %p in cache %p freelist too short by %d\n",
1432 1432 sp, addr, chunks - i);
1433 1433 break;
1434 1434 }
1435 1435
1436 1436 if (cp->cache_flags & UMF_HASH) {
1437 1437 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1438 1438 mdb_warn("failed to read bufctl ptr at %p",
1439 1439 bcp);
1440 1440 break;
1441 1441 }
1442 1442 buf = bc.bc_addr;
1443 1443 } else {
1444 1444 /*
1445 1445 * Otherwise the buffer is (or should be) in the slab
1446 1446 * that we've read in; determine its offset in the
1447 1447 * slab, validate that it's not corrupt, and add to
1448 1448 * our base address to find the umem_bufctl_t. (Note
1449 1449 * that we don't need to add the size of the bufctl
1450 1450 * to our offset calculation because of the slop that's
1451 1451 * allocated for the buffer at ubase.)
1452 1452 */
1453 1453 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1454 1454
1455 1455 if (offs > chunks * chunksize) {
1456 1456 mdb_warn("found corrupt bufctl ptr %p"
1457 1457 " in slab %p in cache %p\n", bcp,
1458 1458 wsp->walk_addr, addr);
1459 1459 break;
1460 1460 }
1461 1461
1462 1462 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1463 1463 buf = UMEM_BUF(cp, bcp);
1464 1464 }
1465 1465
1466 1466 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1467 1467
1468 1468 if (ndx > slabsize / cp->cache_bufsize) {
1469 1469 /*
1470 1470 * This is very wrong; we have managed to find
1471 1471 * a buffer in the slab which shouldn't
1472 1472 * actually be here. Emit a warning, and
1473 1473 * try to continue.
1474 1474 */
1475 1475 mdb_warn("buf %p is out of range for "
1476 1476 "slab %p, cache %p\n", buf, sp, addr);
1477 1477 } else if (type & UM_ALLOCATED) {
1478 1478 /*
1479 1479 * we have found a buffer on the slab's freelist;
1480 1480 * clear its entry
1481 1481 */
1482 1482 valid[ndx] = 0;
1483 1483 } else {
1484 1484 /*
1485 1485 * Report this freed buffer
1486 1486 */
1487 1487 if (type & UM_BUFCTL) {
1488 1488 ret = bufctl_walk_callback(cp, wsp,
1489 1489 (uintptr_t)bcp);
1490 1490 } else {
1491 1491 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1492 1492 }
1493 1493 if (ret != WALK_NEXT)
1494 1494 return (ret);
1495 1495 }
1496 1496
1497 1497 bcp = bc.bc_next;
1498 1498 }
1499 1499
1500 1500 if (bcp != NULL) {
1501 1501 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1502 1502 sp, addr, bcp));
1503 1503 }
1504 1504
1505 1505 /*
1506 1506 * If we are walking freed buffers, the loop above handled reporting
1507 1507 * them.
1508 1508 */
1509 1509 if (type & UM_FREE)
1510 1510 return (WALK_NEXT);
1511 1511
1512 1512 if (type & UM_BUFCTL) {
1513 1513 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1514 1514 "cache %p\n", addr);
1515 1515 return (WALK_ERR);
1516 1516 }
1517 1517
1518 1518 /*
1519 1519 * Report allocated buffers, skipping buffers in the magazine layer.
1520 1520 * We only get this far for small-slab caches.
1521 1521 */
1522 1522 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1523 1523 buf = (char *)kbase + i * chunksize;
1524 1524
1525 1525 if (!valid[i])
1526 1526 continue; /* on slab freelist */
1527 1527
1528 1528 if (magcnt > 0 &&
1529 1529 bsearch(&buf, maglist, magcnt, sizeof (void *),
1530 1530 addrcmp) != NULL)
1531 1531 continue; /* in magazine layer */
1532 1532
1533 1533 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1534 1534 }
1535 1535 return (ret);
1536 1536 }
1537 1537
1538 1538 void
1539 1539 umem_walk_fini(mdb_walk_state_t *wsp)
1540 1540 {
1541 1541 umem_walk_t *umw = wsp->walk_data;
1542 1542 uintptr_t chunksize;
1543 1543 uintptr_t slabsize;
1544 1544
1545 1545 if (umw == NULL)
1546 1546 return;
1547 1547
1548 1548 if (umw->umw_maglist != NULL)
1549 1549 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1550 1550
1551 1551 chunksize = umw->umw_cp->cache_chunksize;
1552 1552 slabsize = umw->umw_cp->cache_slabsize;
1553 1553
1554 1554 if (umw->umw_valid != NULL)
1555 1555 mdb_free(umw->umw_valid, slabsize / chunksize);
1556 1556 if (umw->umw_ubase != NULL)
1557 1557 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1558 1558
1559 1559 mdb_free(umw->umw_cp, umw->umw_csize);
1560 1560 mdb_free(umw, sizeof (umem_walk_t));
1561 1561 }
1562 1562
1563 1563 /*ARGSUSED*/
1564 1564 static int
1565 1565 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1566 1566 {
1567 1567 /*
1568 1568 * Buffers allocated from NOTOUCH caches can also show up as freed
1569 1569 * memory in other caches. This can be a little confusing, so we
1570 1570 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1571 1571 * that "::walk umem" and "::walk freemem" yield disjoint output).
1572 1572 */
1573 1573 if (c->cache_cflags & UMC_NOTOUCH)
1574 1574 return (WALK_NEXT);
1575 1575
1576 1576 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1577 1577 wsp->walk_cbdata, addr) == -1)
1578 1578 return (WALK_DONE);
1579 1579
1580 1580 return (WALK_NEXT);
1581 1581 }
1582 1582
1583 1583 #define UMEM_WALK_ALL(name, wsp) { \
1584 1584 wsp->walk_data = (name); \
1585 1585 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1586 1586 return (WALK_ERR); \
1587 1587 return (WALK_DONE); \
1588 1588 }
1589 1589
1590 1590 int
1591 1591 umem_walk_init(mdb_walk_state_t *wsp)
1592 1592 {
1593 1593 if (wsp->walk_arg != NULL)
1594 1594 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1595 1595
1596 1596 if (wsp->walk_addr == NULL)
1597 1597 UMEM_WALK_ALL("umem", wsp);
1598 1598 return (umem_walk_init_common(wsp, UM_ALLOCATED));
1599 1599 }
1600 1600
1601 1601 int
1602 1602 bufctl_walk_init(mdb_walk_state_t *wsp)
1603 1603 {
1604 1604 if (wsp->walk_addr == NULL)
1605 1605 UMEM_WALK_ALL("bufctl", wsp);
1606 1606 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1607 1607 }
1608 1608
1609 1609 int
1610 1610 freemem_walk_init(mdb_walk_state_t *wsp)
1611 1611 {
1612 1612 if (wsp->walk_addr == NULL)
1613 1613 UMEM_WALK_ALL("freemem", wsp);
1614 1614 return (umem_walk_init_common(wsp, UM_FREE));
1615 1615 }
1616 1616
1617 1617 int
1618 1618 freectl_walk_init(mdb_walk_state_t *wsp)
1619 1619 {
1620 1620 if (wsp->walk_addr == NULL)
1621 1621 UMEM_WALK_ALL("freectl", wsp);
1622 1622 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1623 1623 }
1624 1624
1625 1625 typedef struct bufctl_history_walk {
1626 1626 void *bhw_next;
1627 1627 umem_cache_t *bhw_cache;
1628 1628 umem_slab_t *bhw_slab;
1629 1629 hrtime_t bhw_timestamp;
1630 1630 } bufctl_history_walk_t;
1631 1631
1632 1632 int
1633 1633 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1634 1634 {
1635 1635 bufctl_history_walk_t *bhw;
1636 1636 umem_bufctl_audit_t bc;
1637 1637 umem_bufctl_audit_t bcn;
1638 1638
1639 1639 if (wsp->walk_addr == NULL) {
1640 1640 mdb_warn("bufctl_history walk doesn't support global walks\n");
1641 1641 return (WALK_ERR);
1642 1642 }
1643 1643
1644 1644 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1645 1645 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1646 1646 return (WALK_ERR);
1647 1647 }
1648 1648
1649 1649 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1650 1650 bhw->bhw_timestamp = 0;
1651 1651 bhw->bhw_cache = bc.bc_cache;
1652 1652 bhw->bhw_slab = bc.bc_slab;
1653 1653
1654 1654 /*
1655 1655 * sometimes the first log entry matches the base bufctl; in that
1656 1656 * case, skip the base bufctl.
1657 1657 */
1658 1658 if (bc.bc_lastlog != NULL &&
1659 1659 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1660 1660 bc.bc_addr == bcn.bc_addr &&
1661 1661 bc.bc_cache == bcn.bc_cache &&
1662 1662 bc.bc_slab == bcn.bc_slab &&
1663 1663 bc.bc_timestamp == bcn.bc_timestamp &&
1664 1664 bc.bc_thread == bcn.bc_thread)
1665 1665 bhw->bhw_next = bc.bc_lastlog;
1666 1666 else
1667 1667 bhw->bhw_next = (void *)wsp->walk_addr;
1668 1668
1669 1669 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1670 1670 wsp->walk_data = bhw;
1671 1671
1672 1672 return (WALK_NEXT);
1673 1673 }
1674 1674
1675 1675 int
1676 1676 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1677 1677 {
1678 1678 bufctl_history_walk_t *bhw = wsp->walk_data;
1679 1679 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1680 1680 uintptr_t baseaddr = wsp->walk_addr;
1681 1681 umem_bufctl_audit_t *b;
1682 1682 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1683 1683
1684 1684 if (addr == NULL)
1685 1685 return (WALK_DONE);
1686 1686
1687 1687 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1688 1688 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1689 1689 return (WALK_ERR);
1690 1690 }
1691 1691
1692 1692 /*
1693 1693 * The bufctl is only valid if the address, cache, and slab are
1694 1694 * correct. We also check that the timestamp is decreasing, to
1695 1695 * prevent infinite loops.
1696 1696 */
1697 1697 if ((uintptr_t)b->bc_addr != baseaddr ||
1698 1698 b->bc_cache != bhw->bhw_cache ||
1699 1699 b->bc_slab != bhw->bhw_slab ||
1700 1700 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1701 1701 return (WALK_DONE);
1702 1702
1703 1703 bhw->bhw_next = b->bc_lastlog;
1704 1704 bhw->bhw_timestamp = b->bc_timestamp;
1705 1705
1706 1706 return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1707 1707 }
1708 1708
1709 1709 void
1710 1710 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1711 1711 {
1712 1712 bufctl_history_walk_t *bhw = wsp->walk_data;
1713 1713
1714 1714 mdb_free(bhw, sizeof (*bhw));
1715 1715 }
1716 1716
1717 1717 typedef struct umem_log_walk {
1718 1718 umem_bufctl_audit_t *ulw_base;
1719 1719 umem_bufctl_audit_t **ulw_sorted;
1720 1720 umem_log_header_t ulw_lh;
1721 1721 size_t ulw_size;
1722 1722 size_t ulw_maxndx;
1723 1723 size_t ulw_ndx;
1724 1724 } umem_log_walk_t;
1725 1725
1726 1726 int
1727 1727 umem_log_walk_init(mdb_walk_state_t *wsp)
1728 1728 {
1729 1729 uintptr_t lp = wsp->walk_addr;
1730 1730 umem_log_walk_t *ulw;
1731 1731 umem_log_header_t *lhp;
1732 1732 int maxndx, i, j, k;
1733 1733
1734 1734 /*
1735 1735 * By default (global walk), walk the umem_transaction_log. Otherwise
1736 1736 * read the log whose umem_log_header_t is stored at walk_addr.
1737 1737 */
1738 1738 if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1739 1739 mdb_warn("failed to read 'umem_transaction_log'");
1740 1740 return (WALK_ERR);
1741 1741 }
1742 1742
1743 1743 if (lp == NULL) {
1744 1744 mdb_warn("log is disabled\n");
1745 1745 return (WALK_ERR);
1746 1746 }
1747 1747
1748 1748 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1749 1749 lhp = &ulw->ulw_lh;
1750 1750
1751 1751 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1752 1752 mdb_warn("failed to read log header at %p", lp);
1753 1753 mdb_free(ulw, sizeof (umem_log_walk_t));
1754 1754 return (WALK_ERR);
1755 1755 }
1756 1756
1757 1757 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1758 1758 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1759 1759 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1760 1760
1761 1761 if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1762 1762 (uintptr_t)lhp->lh_base) == -1) {
1763 1763 mdb_warn("failed to read log at base %p", lhp->lh_base);
1764 1764 mdb_free(ulw->ulw_base, ulw->ulw_size);
1765 1765 mdb_free(ulw, sizeof (umem_log_walk_t));
1766 1766 return (WALK_ERR);
1767 1767 }
1768 1768
1769 1769 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1770 1770 sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1771 1771
1772 1772 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1773 1773 caddr_t chunk = (caddr_t)
1774 1774 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1775 1775
1776 1776 for (j = 0; j < maxndx; j++) {
1777 1777 /* LINTED align */
1778 1778 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1779 1779 chunk += UMEM_BUFCTL_AUDIT_SIZE;
1780 1780 }
1781 1781 }
1782 1782
1783 1783 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1784 1784 (int(*)(const void *, const void *))bufctlcmp);
1785 1785
1786 1786 ulw->ulw_maxndx = k;
1787 1787 wsp->walk_data = ulw;
1788 1788
1789 1789 return (WALK_NEXT);
1790 1790 }
1791 1791
1792 1792 int
1793 1793 umem_log_walk_step(mdb_walk_state_t *wsp)
1794 1794 {
1795 1795 umem_log_walk_t *ulw = wsp->walk_data;
1796 1796 umem_bufctl_audit_t *bcp;
1797 1797
1798 1798 if (ulw->ulw_ndx == ulw->ulw_maxndx)
1799 1799 return (WALK_DONE);
1800 1800
1801 1801 bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1802 1802
1803 1803 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1804 1804 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1805 1805 }
1806 1806
1807 1807 void
1808 1808 umem_log_walk_fini(mdb_walk_state_t *wsp)
1809 1809 {
1810 1810 umem_log_walk_t *ulw = wsp->walk_data;
1811 1811
1812 1812 mdb_free(ulw->ulw_base, ulw->ulw_size);
1813 1813 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1814 1814 sizeof (umem_bufctl_audit_t *));
1815 1815 mdb_free(ulw, sizeof (umem_log_walk_t));
1816 1816 }
1817 1817
1818 1818 typedef struct allocdby_bufctl {
1819 1819 uintptr_t abb_addr;
1820 1820 hrtime_t abb_ts;
1821 1821 } allocdby_bufctl_t;
1822 1822
1823 1823 typedef struct allocdby_walk {
1824 1824 const char *abw_walk;
1825 1825 uintptr_t abw_thread;
1826 1826 size_t abw_nbufs;
1827 1827 size_t abw_size;
1828 1828 allocdby_bufctl_t *abw_buf;
1829 1829 size_t abw_ndx;
1830 1830 } allocdby_walk_t;
1831 1831
1832 1832 int
1833 1833 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1834 1834 allocdby_walk_t *abw)
1835 1835 {
1836 1836 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1837 1837 return (WALK_NEXT);
1838 1838
1839 1839 if (abw->abw_nbufs == abw->abw_size) {
1840 1840 allocdby_bufctl_t *buf;
1841 1841 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1842 1842
1843 1843 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1844 1844
1845 1845 bcopy(abw->abw_buf, buf, oldsize);
1846 1846 mdb_free(abw->abw_buf, oldsize);
1847 1847
1848 1848 abw->abw_size <<= 1;
1849 1849 abw->abw_buf = buf;
1850 1850 }
1851 1851
1852 1852 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1853 1853 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1854 1854 abw->abw_nbufs++;
1855 1855
1856 1856 return (WALK_NEXT);
1857 1857 }
1858 1858
1859 1859 /*ARGSUSED*/
1860 1860 int
1861 1861 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1862 1862 {
1863 1863 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1864 1864 abw, addr) == -1) {
1865 1865 mdb_warn("couldn't walk bufctl for cache %p", addr);
1866 1866 return (WALK_DONE);
1867 1867 }
1868 1868
1869 1869 return (WALK_NEXT);
1870 1870 }
1871 1871
1872 1872 static int
1873 1873 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1874 1874 {
1875 1875 if (lhs->abb_ts < rhs->abb_ts)
1876 1876 return (1);
1877 1877 if (lhs->abb_ts > rhs->abb_ts)
1878 1878 return (-1);
1879 1879 return (0);
1880 1880 }
1881 1881
1882 1882 static int
1883 1883 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1884 1884 {
1885 1885 allocdby_walk_t *abw;
1886 1886
1887 1887 if (wsp->walk_addr == NULL) {
1888 1888 mdb_warn("allocdby walk doesn't support global walks\n");
1889 1889 return (WALK_ERR);
1890 1890 }
1891 1891
1892 1892 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1893 1893
1894 1894 abw->abw_thread = wsp->walk_addr;
1895 1895 abw->abw_walk = walk;
1896 1896 abw->abw_size = 128; /* something reasonable */
1897 1897 abw->abw_buf =
1898 1898 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1899 1899
1900 1900 wsp->walk_data = abw;
1901 1901
1902 1902 if (mdb_walk("umem_cache",
1903 1903 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1904 1904 mdb_warn("couldn't walk umem_cache");
1905 1905 allocdby_walk_fini(wsp);
1906 1906 return (WALK_ERR);
1907 1907 }
1908 1908
1909 1909 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1910 1910 (int(*)(const void *, const void *))allocdby_cmp);
1911 1911
1912 1912 return (WALK_NEXT);
1913 1913 }
1914 1914
1915 1915 int
1916 1916 allocdby_walk_init(mdb_walk_state_t *wsp)
1917 1917 {
1918 1918 return (allocdby_walk_init_common(wsp, "bufctl"));
1919 1919 }
1920 1920
1921 1921 int
1922 1922 freedby_walk_init(mdb_walk_state_t *wsp)
1923 1923 {
1924 1924 return (allocdby_walk_init_common(wsp, "freectl"));
1925 1925 }
1926 1926
1927 1927 int
1928 1928 allocdby_walk_step(mdb_walk_state_t *wsp)
1929 1929 {
1930 1930 allocdby_walk_t *abw = wsp->walk_data;
1931 1931 uintptr_t addr;
1932 1932 umem_bufctl_audit_t *bcp;
1933 1933 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1934 1934
1935 1935 if (abw->abw_ndx == abw->abw_nbufs)
1936 1936 return (WALK_DONE);
1937 1937
1938 1938 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1939 1939
1940 1940 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1941 1941 mdb_warn("couldn't read bufctl at %p", addr);
1942 1942 return (WALK_DONE);
1943 1943 }
1944 1944
1945 1945 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1946 1946 }
1947 1947
1948 1948 void
1949 1949 allocdby_walk_fini(mdb_walk_state_t *wsp)
1950 1950 {
1951 1951 allocdby_walk_t *abw = wsp->walk_data;
1952 1952
1953 1953 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1954 1954 mdb_free(abw, sizeof (allocdby_walk_t));
1955 1955 }
1956 1956
1957 1957 /*ARGSUSED*/
1958 1958 int
1959 1959 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1960 1960 {
1961 1961 char c[MDB_SYM_NAMLEN];
1962 1962 GElf_Sym sym;
1963 1963 int i;
1964 1964
1965 1965 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1966 1966 for (i = 0; i < bcp->bc_depth; i++) {
1967 1967 if (mdb_lookup_by_addr(bcp->bc_stack[i],
1968 1968 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1969 1969 continue;
1970 1970 if (is_umem_sym(c, "umem_"))
1971 1971 continue;
1972 1972 mdb_printf("%s+0x%lx",
1973 1973 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1974 1974 break;
1975 1975 }
1976 1976 mdb_printf("\n");
1977 1977
1978 1978 return (WALK_NEXT);
1979 1979 }
1980 1980
1981 1981 static int
1982 1982 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1983 1983 {
1984 1984 if (!(flags & DCMD_ADDRSPEC))
1985 1985 return (DCMD_USAGE);
1986 1986
1987 1987 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1988 1988
1989 1989 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1990 1990 mdb_warn("can't walk '%s' for %p", w, addr);
1991 1991 return (DCMD_ERR);
1992 1992 }
1993 1993
1994 1994 return (DCMD_OK);
1995 1995 }
1996 1996
1997 1997 /*ARGSUSED*/
1998 1998 int
1999 1999 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2000 2000 {
2001 2001 return (allocdby_common(addr, flags, "allocdby"));
2002 2002 }
2003 2003
2004 2004 /*ARGSUSED*/
2005 2005 int
2006 2006 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2007 2007 {
2008 2008 return (allocdby_common(addr, flags, "freedby"));
2009 2009 }
2010 2010
2011 2011 typedef struct whatis_info {
2012 2012 mdb_whatis_t *wi_w;
2013 2013 const umem_cache_t *wi_cache;
2014 2014 const vmem_t *wi_vmem;
2015 2015 vmem_t *wi_msb_arena;
2016 2016 size_t wi_slab_size;
2017 2017 int wi_slab_found;
2018 2018 uint_t wi_freemem;
2019 2019 } whatis_info_t;
2020 2020
2021 2021 /* call one of our dcmd functions with "-v" and the provided address */
2022 2022 static void
2023 2023 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2024 2024 {
2025 2025 mdb_arg_t a;
2026 2026 a.a_type = MDB_TYPE_STRING;
2027 2027 a.a_un.a_str = "-v";
2028 2028
2029 2029 mdb_printf(":\n");
2030 2030 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2031 2031 }
2032 2032
2033 2033 static void
2034 2034 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2035 2035 uintptr_t baddr)
2036 2036 {
2037 2037 mdb_whatis_t *w = wi->wi_w;
2038 2038 const umem_cache_t *cp = wi->wi_cache;
2039 2039 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2040 2040
2041 2041 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2042 2042
2043 2043 mdb_whatis_report_object(w, maddr, addr, "");
2044 2044
2045 2045 if (baddr != 0 && !call_printer)
2046 2046 mdb_printf("bufctl %p ", baddr);
2047 2047
2048 2048 mdb_printf("%s from %s",
2049 2049 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2050 2050
2051 2051 if (call_printer && baddr != 0) {
2052 2052 whatis_call_printer(bufctl, baddr);
2053 2053 return;
2054 2054 }
2055 2055 mdb_printf("\n");
2056 2056 }
2057 2057
2058 2058 /*ARGSUSED*/
2059 2059 static int
2060 2060 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2061 2061 {
2062 2062 mdb_whatis_t *w = wi->wi_w;
2063 2063
2064 2064 uintptr_t cur;
2065 2065 size_t size = wi->wi_cache->cache_bufsize;
2066 2066
2067 2067 while (mdb_whatis_match(w, addr, size, &cur))
2068 2068 whatis_print_umem(wi, cur, addr, NULL);
2069 2069
2070 2070 return (WHATIS_WALKRET(w));
2071 2071 }
2072 2072
2073 2073 /*ARGSUSED*/
2074 2074 static int
2075 2075 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2076 2076 {
2077 2077 mdb_whatis_t *w = wi->wi_w;
2078 2078
2079 2079 uintptr_t cur;
2080 2080 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2081 2081 size_t size = wi->wi_cache->cache_bufsize;
2082 2082
2083 2083 while (mdb_whatis_match(w, addr, size, &cur))
2084 2084 whatis_print_umem(wi, cur, addr, baddr);
2085 2085
2086 2086 return (WHATIS_WALKRET(w));
2087 2087 }
2088 2088
2089 2089
2090 2090 static int
2091 2091 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2092 2092 {
2093 2093 mdb_whatis_t *w = wi->wi_w;
2094 2094
2095 2095 size_t size = vs->vs_end - vs->vs_start;
2096 2096 uintptr_t cur;
2097 2097
2098 2098 /* We're not interested in anything but alloc and free segments */
2099 2099 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2100 2100 return (WALK_NEXT);
2101 2101
2102 2102 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2103 2103 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2104 2104
2105 2105 /*
2106 2106 * If we're not printing it seperately, provide the vmem_seg
2107 2107 * pointer if it has a stack trace.
2108 2108 */
↓ open down ↓ |
2071 lines elided |
↑ open up ↑ |
2109 2109 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2110 2110 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2111 2111 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2112 2112 mdb_printf("vmem_seg %p ", addr);
2113 2113 }
2114 2114
2115 2115 mdb_printf("%s from %s vmem arena",
2116 2116 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2117 2117 wi->wi_vmem->vm_name);
2118 2118
2119 - if (!mdb_whatis_flags(w) & WHATIS_QUIET)
2119 + if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2120 2120 whatis_call_printer(vmem_seg, addr);
2121 2121 else
2122 2122 mdb_printf("\n");
2123 2123 }
2124 2124
2125 2125 return (WHATIS_WALKRET(w));
2126 2126 }
2127 2127
2128 2128 static int
2129 2129 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2130 2130 {
2131 2131 mdb_whatis_t *w = wi->wi_w;
2132 2132 const char *nm = vmem->vm_name;
2133 2133 wi->wi_vmem = vmem;
2134 2134
2135 2135 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2136 2136 mdb_printf("Searching vmem arena %s...\n", nm);
2137 2137
2138 2138 if (mdb_pwalk("vmem_seg",
2139 2139 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2140 2140 mdb_warn("can't walk vmem seg for %p", addr);
2141 2141 return (WALK_NEXT);
2142 2142 }
2143 2143
2144 2144 return (WHATIS_WALKRET(w));
2145 2145 }
2146 2146
2147 2147 /*ARGSUSED*/
2148 2148 static int
2149 2149 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2150 2150 {
2151 2151 mdb_whatis_t *w = wi->wi_w;
2152 2152
2153 2153 /* It must overlap with the slab data, or it's not interesting */
2154 2154 if (mdb_whatis_overlaps(w,
2155 2155 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2156 2156 wi->wi_slab_found++;
2157 2157 return (WALK_DONE);
2158 2158 }
2159 2159 return (WALK_NEXT);
2160 2160 }
2161 2161
2162 2162 static int
2163 2163 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2164 2164 {
2165 2165 mdb_whatis_t *w = wi->wi_w;
2166 2166 char *walk, *freewalk;
2167 2167 mdb_walk_cb_t func;
2168 2168 int do_bufctl;
2169 2169
2170 2170 /* Override the '-b' flag as necessary */
2171 2171 if (!(c->cache_flags & UMF_HASH))
2172 2172 do_bufctl = FALSE; /* no bufctls to walk */
2173 2173 else if (c->cache_flags & UMF_AUDIT)
2174 2174 do_bufctl = TRUE; /* we always want debugging info */
2175 2175 else
2176 2176 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2177 2177
2178 2178 if (do_bufctl) {
2179 2179 walk = "bufctl";
2180 2180 freewalk = "freectl";
2181 2181 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2182 2182 } else {
2183 2183 walk = "umem";
2184 2184 freewalk = "freemem";
2185 2185 func = (mdb_walk_cb_t)whatis_walk_umem;
2186 2186 }
2187 2187
2188 2188 wi->wi_cache = c;
2189 2189
2190 2190 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2191 2191 mdb_printf("Searching %s...\n", c->cache_name);
2192 2192
2193 2193 /*
2194 2194 * If more then two buffers live on each slab, figure out if we're
2195 2195 * interested in anything in any slab before doing the more expensive
2196 2196 * umem/freemem (bufctl/freectl) walkers.
2197 2197 */
2198 2198 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2199 2199 if (!(c->cache_flags & UMF_HASH))
2200 2200 wi->wi_slab_size -= sizeof (umem_slab_t);
2201 2201
2202 2202 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2203 2203 wi->wi_slab_found = 0;
2204 2204 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2205 2205 addr) == -1) {
2206 2206 mdb_warn("can't find umem_slab walker");
2207 2207 return (WALK_DONE);
2208 2208 }
2209 2209 if (wi->wi_slab_found == 0)
2210 2210 return (WALK_NEXT);
2211 2211 }
2212 2212
2213 2213 wi->wi_freemem = FALSE;
2214 2214 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2215 2215 mdb_warn("can't find %s walker", walk);
2216 2216 return (WALK_DONE);
2217 2217 }
2218 2218
2219 2219 if (mdb_whatis_done(w))
2220 2220 return (WALK_DONE);
2221 2221
2222 2222 /*
2223 2223 * We have searched for allocated memory; now search for freed memory.
2224 2224 */
2225 2225 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2226 2226 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2227 2227
2228 2228 wi->wi_freemem = TRUE;
2229 2229
2230 2230 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2231 2231 mdb_warn("can't find %s walker", freewalk);
2232 2232 return (WALK_DONE);
2233 2233 }
2234 2234
2235 2235 return (WHATIS_WALKRET(w));
2236 2236 }
2237 2237
2238 2238 static int
2239 2239 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2240 2240 {
2241 2241 if (c->cache_arena == wi->wi_msb_arena ||
2242 2242 (c->cache_cflags & UMC_NOTOUCH))
2243 2243 return (WALK_NEXT);
2244 2244
2245 2245 return (whatis_walk_cache(addr, c, wi));
2246 2246 }
2247 2247
2248 2248 static int
2249 2249 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2250 2250 {
2251 2251 if (c->cache_arena != wi->wi_msb_arena)
2252 2252 return (WALK_NEXT);
2253 2253
2254 2254 return (whatis_walk_cache(addr, c, wi));
2255 2255 }
2256 2256
2257 2257 static int
2258 2258 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2259 2259 {
2260 2260 if (c->cache_arena == wi->wi_msb_arena ||
2261 2261 !(c->cache_cflags & UMC_NOTOUCH))
2262 2262 return (WALK_NEXT);
2263 2263
2264 2264 return (whatis_walk_cache(addr, c, wi));
2265 2265 }
2266 2266
2267 2267 /*ARGSUSED*/
2268 2268 static int
2269 2269 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2270 2270 {
2271 2271 whatis_info_t wi;
2272 2272
2273 2273 bzero(&wi, sizeof (wi));
2274 2274 wi.wi_w = w;
2275 2275
2276 2276 /* umem's metadata is allocated from the umem_internal_arena */
2277 2277 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2278 2278 mdb_warn("unable to readvar \"umem_internal_arena\"");
2279 2279
2280 2280 /*
2281 2281 * We process umem caches in the following order:
2282 2282 *
2283 2283 * non-UMC_NOTOUCH, non-metadata (typically the most interesting)
2284 2284 * metadata (can be huge with UMF_AUDIT)
2285 2285 * UMC_NOTOUCH, non-metadata (see umem_walk_all())
2286 2286 */
2287 2287 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2288 2288 &wi) == -1 ||
2289 2289 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2290 2290 &wi) == -1 ||
2291 2291 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2292 2292 &wi) == -1) {
2293 2293 mdb_warn("couldn't find umem_cache walker");
2294 2294 return (1);
2295 2295 }
2296 2296 return (0);
2297 2297 }
2298 2298
2299 2299 /*ARGSUSED*/
2300 2300 static int
2301 2301 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2302 2302 {
2303 2303 whatis_info_t wi;
2304 2304
2305 2305 bzero(&wi, sizeof (wi));
2306 2306 wi.wi_w = w;
2307 2307
2308 2308 if (mdb_walk("vmem_postfix",
2309 2309 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2310 2310 mdb_warn("couldn't find vmem_postfix walker");
2311 2311 return (1);
2312 2312 }
2313 2313 return (0);
2314 2314 }
2315 2315
2316 2316 int
2317 2317 umem_init(void)
2318 2318 {
2319 2319 mdb_walker_t w = {
2320 2320 "umem_cache", "walk list of umem caches", umem_cache_walk_init,
2321 2321 umem_cache_walk_step, umem_cache_walk_fini
2322 2322 };
2323 2323
2324 2324 if (mdb_add_walker(&w) == -1) {
2325 2325 mdb_warn("failed to add umem_cache walker");
2326 2326 return (-1);
2327 2327 }
2328 2328
2329 2329 if (umem_update_variables() == -1)
2330 2330 return (-1);
2331 2331
2332 2332 /* install a callback so that our variables are always up-to-date */
2333 2333 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2334 2334 umem_statechange_cb(NULL);
2335 2335
2336 2336 /*
2337 2337 * Register our ::whatis callbacks.
2338 2338 */
2339 2339 mdb_whatis_register("umem", whatis_run_umem, NULL,
2340 2340 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2341 2341 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2342 2342 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2343 2343
2344 2344 return (0);
2345 2345 }
2346 2346
2347 2347 typedef struct umem_log_cpu {
2348 2348 uintptr_t umc_low;
2349 2349 uintptr_t umc_high;
2350 2350 } umem_log_cpu_t;
2351 2351
2352 2352 int
2353 2353 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2354 2354 {
2355 2355 int i;
2356 2356
2357 2357 for (i = 0; i < umem_max_ncpus; i++) {
2358 2358 if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2359 2359 break;
2360 2360 }
2361 2361
2362 2362 if (i == umem_max_ncpus)
2363 2363 mdb_printf(" ");
2364 2364 else
2365 2365 mdb_printf("%3d", i);
2366 2366
2367 2367 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2368 2368 b->bc_timestamp, b->bc_thread);
2369 2369
2370 2370 return (WALK_NEXT);
2371 2371 }
2372 2372
2373 2373 /*ARGSUSED*/
2374 2374 int
2375 2375 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2376 2376 {
2377 2377 umem_log_header_t lh;
2378 2378 umem_cpu_log_header_t clh;
2379 2379 uintptr_t lhp, clhp;
2380 2380 umem_log_cpu_t *umc;
2381 2381 int i;
2382 2382
2383 2383 if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2384 2384 mdb_warn("failed to read 'umem_transaction_log'");
2385 2385 return (DCMD_ERR);
2386 2386 }
2387 2387
2388 2388 if (lhp == NULL) {
2389 2389 mdb_warn("no umem transaction log\n");
2390 2390 return (DCMD_ERR);
2391 2391 }
2392 2392
2393 2393 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2394 2394 mdb_warn("failed to read log header at %p", lhp);
2395 2395 return (DCMD_ERR);
2396 2396 }
2397 2397
2398 2398 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2399 2399
2400 2400 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2401 2401 UM_SLEEP | UM_GC);
2402 2402
2403 2403 for (i = 0; i < umem_max_ncpus; i++) {
2404 2404 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2405 2405 mdb_warn("cannot read cpu %d's log header at %p",
2406 2406 i, clhp);
2407 2407 return (DCMD_ERR);
2408 2408 }
2409 2409
2410 2410 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2411 2411 (uintptr_t)lh.lh_base;
2412 2412 umc[i].umc_high = (uintptr_t)clh.clh_current;
2413 2413
2414 2414 clhp += sizeof (umem_cpu_log_header_t);
2415 2415 }
2416 2416
2417 2417 if (DCMD_HDRSPEC(flags)) {
2418 2418 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2419 2419 "BUFADDR", "TIMESTAMP", "THREAD");
2420 2420 }
2421 2421
2422 2422 /*
2423 2423 * If we have been passed an address, we'll just print out that
2424 2424 * log entry.
2425 2425 */
2426 2426 if (flags & DCMD_ADDRSPEC) {
2427 2427 umem_bufctl_audit_t *bp;
2428 2428 UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2429 2429
2430 2430 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2431 2431 mdb_warn("failed to read bufctl at %p", addr);
2432 2432 return (DCMD_ERR);
2433 2433 }
2434 2434
2435 2435 (void) umem_log_walk(addr, bp, umc);
2436 2436
2437 2437 return (DCMD_OK);
2438 2438 }
2439 2439
2440 2440 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2441 2441 mdb_warn("can't find umem log walker");
2442 2442 return (DCMD_ERR);
2443 2443 }
2444 2444
2445 2445 return (DCMD_OK);
2446 2446 }
2447 2447
2448 2448 typedef struct bufctl_history_cb {
2449 2449 int bhc_flags;
2450 2450 int bhc_argc;
2451 2451 const mdb_arg_t *bhc_argv;
2452 2452 int bhc_ret;
2453 2453 } bufctl_history_cb_t;
2454 2454
2455 2455 /*ARGSUSED*/
2456 2456 static int
2457 2457 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2458 2458 {
2459 2459 bufctl_history_cb_t *bhc = arg;
2460 2460
2461 2461 bhc->bhc_ret =
2462 2462 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2463 2463
2464 2464 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2465 2465
2466 2466 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2467 2467 }
2468 2468
2469 2469 void
2470 2470 bufctl_help(void)
2471 2471 {
2472 2472 mdb_printf("%s\n",
2473 2473 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2474 2474 mdb_dec_indent(2);
2475 2475 mdb_printf("%<b>OPTIONS%</b>\n");
2476 2476 mdb_inc_indent(2);
2477 2477 mdb_printf("%s",
2478 2478 " -v Display the full content of the bufctl, including its stack trace\n"
2479 2479 " -h retrieve the bufctl's transaction history, if available\n"
2480 2480 " -a addr\n"
2481 2481 " filter out bufctls not involving the buffer at addr\n"
2482 2482 " -c caller\n"
2483 2483 " filter out bufctls without the function/PC in their stack trace\n"
2484 2484 " -e earliest\n"
2485 2485 " filter out bufctls timestamped before earliest\n"
2486 2486 " -l latest\n"
2487 2487 " filter out bufctls timestamped after latest\n"
2488 2488 " -t thread\n"
2489 2489 " filter out bufctls not involving thread\n");
2490 2490 }
2491 2491
2492 2492 int
2493 2493 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2494 2494 {
2495 2495 uint_t verbose = FALSE;
2496 2496 uint_t history = FALSE;
2497 2497 uint_t in_history = FALSE;
2498 2498 uintptr_t caller = NULL, thread = NULL;
2499 2499 uintptr_t laddr, haddr, baddr = NULL;
2500 2500 hrtime_t earliest = 0, latest = 0;
2501 2501 int i, depth;
2502 2502 char c[MDB_SYM_NAMLEN];
2503 2503 GElf_Sym sym;
2504 2504 umem_bufctl_audit_t *bcp;
2505 2505 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2506 2506
2507 2507 if (mdb_getopts(argc, argv,
2508 2508 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2509 2509 'h', MDB_OPT_SETBITS, TRUE, &history,
2510 2510 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2511 2511 'c', MDB_OPT_UINTPTR, &caller,
2512 2512 't', MDB_OPT_UINTPTR, &thread,
2513 2513 'e', MDB_OPT_UINT64, &earliest,
2514 2514 'l', MDB_OPT_UINT64, &latest,
2515 2515 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2516 2516 return (DCMD_USAGE);
2517 2517
2518 2518 if (!(flags & DCMD_ADDRSPEC))
2519 2519 return (DCMD_USAGE);
2520 2520
2521 2521 if (in_history && !history)
2522 2522 return (DCMD_USAGE);
2523 2523
2524 2524 if (history && !in_history) {
2525 2525 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2526 2526 UM_SLEEP | UM_GC);
2527 2527 bufctl_history_cb_t bhc;
2528 2528
2529 2529 nargv[0].a_type = MDB_TYPE_STRING;
2530 2530 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2531 2531
2532 2532 for (i = 0; i < argc; i++)
2533 2533 nargv[i + 1] = argv[i];
2534 2534
2535 2535 /*
2536 2536 * When in history mode, we treat each element as if it
2537 2537 * were in a seperate loop, so that the headers group
2538 2538 * bufctls with similar histories.
2539 2539 */
2540 2540 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2541 2541 bhc.bhc_argc = argc + 1;
2542 2542 bhc.bhc_argv = nargv;
2543 2543 bhc.bhc_ret = DCMD_OK;
2544 2544
2545 2545 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2546 2546 addr) == -1) {
2547 2547 mdb_warn("unable to walk bufctl_history");
2548 2548 return (DCMD_ERR);
2549 2549 }
2550 2550
2551 2551 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2552 2552 mdb_printf("\n");
2553 2553
2554 2554 return (bhc.bhc_ret);
2555 2555 }
2556 2556
2557 2557 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2558 2558 if (verbose) {
2559 2559 mdb_printf("%16s %16s %16s %16s\n"
2560 2560 "%<u>%16s %16s %16s %16s%</u>\n",
2561 2561 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2562 2562 "", "CACHE", "LASTLOG", "CONTENTS");
2563 2563 } else {
2564 2564 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2565 2565 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2566 2566 }
2567 2567 }
2568 2568
2569 2569 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2570 2570 mdb_warn("couldn't read bufctl at %p", addr);
2571 2571 return (DCMD_ERR);
2572 2572 }
2573 2573
2574 2574 /*
2575 2575 * Guard against bogus bc_depth in case the bufctl is corrupt or
2576 2576 * the address does not really refer to a bufctl.
2577 2577 */
2578 2578 depth = MIN(bcp->bc_depth, umem_stack_depth);
2579 2579
2580 2580 if (caller != NULL) {
2581 2581 laddr = caller;
2582 2582 haddr = caller + sizeof (caller);
2583 2583
2584 2584 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2585 2585 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2586 2586 /*
2587 2587 * We were provided an exact symbol value; any
2588 2588 * address in the function is valid.
2589 2589 */
2590 2590 laddr = (uintptr_t)sym.st_value;
2591 2591 haddr = (uintptr_t)sym.st_value + sym.st_size;
2592 2592 }
2593 2593
2594 2594 for (i = 0; i < depth; i++)
2595 2595 if (bcp->bc_stack[i] >= laddr &&
2596 2596 bcp->bc_stack[i] < haddr)
2597 2597 break;
2598 2598
2599 2599 if (i == depth)
2600 2600 return (DCMD_OK);
2601 2601 }
2602 2602
2603 2603 if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2604 2604 return (DCMD_OK);
2605 2605
2606 2606 if (earliest != 0 && bcp->bc_timestamp < earliest)
2607 2607 return (DCMD_OK);
2608 2608
2609 2609 if (latest != 0 && bcp->bc_timestamp > latest)
2610 2610 return (DCMD_OK);
2611 2611
2612 2612 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2613 2613 return (DCMD_OK);
2614 2614
2615 2615 if (flags & DCMD_PIPE_OUT) {
2616 2616 mdb_printf("%#r\n", addr);
2617 2617 return (DCMD_OK);
2618 2618 }
2619 2619
2620 2620 if (verbose) {
2621 2621 mdb_printf(
2622 2622 "%<b>%16p%</b> %16p %16llx %16d\n"
2623 2623 "%16s %16p %16p %16p\n",
2624 2624 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2625 2625 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2626 2626
2627 2627 mdb_inc_indent(17);
2628 2628 for (i = 0; i < depth; i++)
2629 2629 mdb_printf("%a\n", bcp->bc_stack[i]);
2630 2630 mdb_dec_indent(17);
2631 2631 mdb_printf("\n");
2632 2632 } else {
2633 2633 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2634 2634 bcp->bc_timestamp, bcp->bc_thread);
2635 2635
2636 2636 for (i = 0; i < depth; i++) {
2637 2637 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2638 2638 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2639 2639 continue;
2640 2640 if (is_umem_sym(c, "umem_"))
2641 2641 continue;
2642 2642 mdb_printf(" %a\n", bcp->bc_stack[i]);
2643 2643 break;
2644 2644 }
2645 2645
2646 2646 if (i >= depth)
2647 2647 mdb_printf("\n");
2648 2648 }
2649 2649
2650 2650 return (DCMD_OK);
2651 2651 }
2652 2652
2653 2653 /*ARGSUSED*/
2654 2654 int
2655 2655 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2656 2656 {
2657 2657 mdb_arg_t a;
2658 2658
2659 2659 if (!(flags & DCMD_ADDRSPEC))
2660 2660 return (DCMD_USAGE);
2661 2661
2662 2662 if (argc != 0)
2663 2663 return (DCMD_USAGE);
2664 2664
2665 2665 a.a_type = MDB_TYPE_STRING;
2666 2666 a.a_un.a_str = "-v";
2667 2667
2668 2668 return (bufctl(addr, flags, 1, &a));
2669 2669 }
2670 2670
2671 2671 typedef struct umem_verify {
2672 2672 uint64_t *umv_buf; /* buffer to read cache contents into */
2673 2673 size_t umv_size; /* number of bytes in umv_buf */
2674 2674 int umv_corruption; /* > 0 if corruption found. */
2675 2675 int umv_besilent; /* report actual corruption sites */
2676 2676 struct umem_cache umv_cache; /* the cache we're operating on */
2677 2677 } umem_verify_t;
2678 2678
2679 2679 /*
2680 2680 * verify_pattern()
2681 2681 * verify that buf is filled with the pattern pat.
2682 2682 */
2683 2683 static int64_t
2684 2684 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2685 2685 {
2686 2686 /*LINTED*/
2687 2687 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2688 2688 uint64_t *buf;
2689 2689
2690 2690 for (buf = buf_arg; buf < bufend; buf++)
2691 2691 if (*buf != pat)
2692 2692 return ((uintptr_t)buf - (uintptr_t)buf_arg);
2693 2693 return (-1);
2694 2694 }
2695 2695
2696 2696 /*
2697 2697 * verify_buftag()
2698 2698 * verify that btp->bt_bxstat == (bcp ^ pat)
2699 2699 */
2700 2700 static int
2701 2701 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2702 2702 {
2703 2703 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2704 2704 }
2705 2705
2706 2706 /*
2707 2707 * verify_free()
2708 2708 * verify the integrity of a free block of memory by checking
2709 2709 * that it is filled with 0xdeadbeef and that its buftag is sane.
2710 2710 */
2711 2711 /*ARGSUSED1*/
2712 2712 static int
2713 2713 verify_free(uintptr_t addr, const void *data, void *private)
2714 2714 {
2715 2715 umem_verify_t *umv = (umem_verify_t *)private;
2716 2716 uint64_t *buf = umv->umv_buf; /* buf to validate */
2717 2717 int64_t corrupt; /* corruption offset */
2718 2718 umem_buftag_t *buftagp; /* ptr to buftag */
2719 2719 umem_cache_t *cp = &umv->umv_cache;
2720 2720 int besilent = umv->umv_besilent;
2721 2721
2722 2722 /*LINTED*/
2723 2723 buftagp = UMEM_BUFTAG(cp, buf);
2724 2724
2725 2725 /*
2726 2726 * Read the buffer to check.
2727 2727 */
2728 2728 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2729 2729 if (!besilent)
2730 2730 mdb_warn("couldn't read %p", addr);
2731 2731 return (WALK_NEXT);
2732 2732 }
2733 2733
2734 2734 if ((corrupt = verify_pattern(buf, cp->cache_verify,
2735 2735 UMEM_FREE_PATTERN)) >= 0) {
2736 2736 if (!besilent)
2737 2737 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2738 2738 addr, (uintptr_t)addr + corrupt);
2739 2739 goto corrupt;
2740 2740 }
2741 2741
2742 2742 if ((cp->cache_flags & UMF_HASH) &&
2743 2743 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2744 2744 if (!besilent)
2745 2745 mdb_printf("buffer %p (free) seems to "
2746 2746 "have a corrupt redzone pattern\n", addr);
2747 2747 goto corrupt;
2748 2748 }
2749 2749
2750 2750 /*
2751 2751 * confirm bufctl pointer integrity.
2752 2752 */
2753 2753 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2754 2754 if (!besilent)
2755 2755 mdb_printf("buffer %p (free) has a corrupt "
2756 2756 "buftag\n", addr);
2757 2757 goto corrupt;
2758 2758 }
2759 2759
2760 2760 return (WALK_NEXT);
2761 2761 corrupt:
2762 2762 umv->umv_corruption++;
2763 2763 return (WALK_NEXT);
2764 2764 }
2765 2765
2766 2766 /*
2767 2767 * verify_alloc()
2768 2768 * Verify that the buftag of an allocated buffer makes sense with respect
2769 2769 * to the buffer.
2770 2770 */
2771 2771 /*ARGSUSED1*/
2772 2772 static int
2773 2773 verify_alloc(uintptr_t addr, const void *data, void *private)
2774 2774 {
2775 2775 umem_verify_t *umv = (umem_verify_t *)private;
2776 2776 umem_cache_t *cp = &umv->umv_cache;
2777 2777 uint64_t *buf = umv->umv_buf; /* buf to validate */
2778 2778 /*LINTED*/
2779 2779 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2780 2780 uint32_t *ip = (uint32_t *)buftagp;
2781 2781 uint8_t *bp = (uint8_t *)buf;
2782 2782 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
2783 2783 int besilent = umv->umv_besilent;
2784 2784
2785 2785 /*
2786 2786 * Read the buffer to check.
2787 2787 */
2788 2788 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2789 2789 if (!besilent)
2790 2790 mdb_warn("couldn't read %p", addr);
2791 2791 return (WALK_NEXT);
2792 2792 }
2793 2793
2794 2794 /*
2795 2795 * There are two cases to handle:
2796 2796 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2797 2797 * 0xfeedfacefeedface at the end of it
2798 2798 * 2. If the buf was alloc'd using umem_alloc, it will have
2799 2799 * 0xbb just past the end of the region in use. At the buftag,
2800 2800 * it will have 0xfeedface (or, if the whole buffer is in use,
2801 2801 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2802 2802 * endianness), followed by 32 bits containing the offset of the
2803 2803 * 0xbb byte in the buffer.
2804 2804 *
2805 2805 * Finally, the two 32-bit words that comprise the second half of the
2806 2806 * buftag should xor to UMEM_BUFTAG_ALLOC
2807 2807 */
2808 2808
2809 2809 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2810 2810 looks_ok = 1;
2811 2811 else if (!UMEM_SIZE_VALID(ip[1]))
2812 2812 size_ok = 0;
2813 2813 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2814 2814 looks_ok = 1;
2815 2815 else
2816 2816 size_ok = 0;
2817 2817
2818 2818 if (!size_ok) {
2819 2819 if (!besilent)
2820 2820 mdb_printf("buffer %p (allocated) has a corrupt "
2821 2821 "redzone size encoding\n", addr);
2822 2822 goto corrupt;
2823 2823 }
2824 2824
2825 2825 if (!looks_ok) {
2826 2826 if (!besilent)
2827 2827 mdb_printf("buffer %p (allocated) has a corrupt "
2828 2828 "redzone signature\n", addr);
2829 2829 goto corrupt;
2830 2830 }
2831 2831
2832 2832 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2833 2833 if (!besilent)
2834 2834 mdb_printf("buffer %p (allocated) has a "
2835 2835 "corrupt buftag\n", addr);
2836 2836 goto corrupt;
2837 2837 }
2838 2838
2839 2839 return (WALK_NEXT);
2840 2840 corrupt:
2841 2841 umv->umv_corruption++;
2842 2842 return (WALK_NEXT);
2843 2843 }
2844 2844
2845 2845 /*ARGSUSED2*/
2846 2846 int
2847 2847 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2848 2848 {
2849 2849 if (flags & DCMD_ADDRSPEC) {
2850 2850 int check_alloc = 0, check_free = 0;
2851 2851 umem_verify_t umv;
2852 2852
2853 2853 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2854 2854 addr) == -1) {
2855 2855 mdb_warn("couldn't read umem_cache %p", addr);
2856 2856 return (DCMD_ERR);
2857 2857 }
2858 2858
2859 2859 umv.umv_size = umv.umv_cache.cache_buftag +
2860 2860 sizeof (umem_buftag_t);
2861 2861 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2862 2862 umv.umv_corruption = 0;
2863 2863
2864 2864 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2865 2865 check_alloc = 1;
2866 2866 if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2867 2867 check_free = 1;
2868 2868 } else {
2869 2869 if (!(flags & DCMD_LOOP)) {
2870 2870 mdb_warn("cache %p (%s) does not have "
2871 2871 "redzone checking enabled\n", addr,
2872 2872 umv.umv_cache.cache_name);
2873 2873 }
2874 2874 return (DCMD_ERR);
2875 2875 }
2876 2876
2877 2877 if (flags & DCMD_LOOP) {
2878 2878 /*
2879 2879 * table mode, don't print out every corrupt buffer
2880 2880 */
2881 2881 umv.umv_besilent = 1;
2882 2882 } else {
2883 2883 mdb_printf("Summary for cache '%s'\n",
2884 2884 umv.umv_cache.cache_name);
2885 2885 mdb_inc_indent(2);
2886 2886 umv.umv_besilent = 0;
2887 2887 }
2888 2888
2889 2889 if (check_alloc)
2890 2890 (void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2891 2891 if (check_free)
2892 2892 (void) mdb_pwalk("freemem", verify_free, &umv, addr);
2893 2893
2894 2894 if (flags & DCMD_LOOP) {
2895 2895 if (umv.umv_corruption == 0) {
2896 2896 mdb_printf("%-*s %?p clean\n",
2897 2897 UMEM_CACHE_NAMELEN,
2898 2898 umv.umv_cache.cache_name, addr);
2899 2899 } else {
2900 2900 char *s = ""; /* optional s in "buffer[s]" */
2901 2901 if (umv.umv_corruption > 1)
2902 2902 s = "s";
2903 2903
2904 2904 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2905 2905 UMEM_CACHE_NAMELEN,
2906 2906 umv.umv_cache.cache_name, addr,
2907 2907 umv.umv_corruption, s);
2908 2908 }
2909 2909 } else {
2910 2910 /*
2911 2911 * This is the more verbose mode, when the user has
2912 2912 * type addr::umem_verify. If the cache was clean,
2913 2913 * nothing will have yet been printed. So say something.
2914 2914 */
2915 2915 if (umv.umv_corruption == 0)
2916 2916 mdb_printf("clean\n");
2917 2917
2918 2918 mdb_dec_indent(2);
2919 2919 }
2920 2920 } else {
2921 2921 /*
2922 2922 * If the user didn't specify a cache to verify, we'll walk all
2923 2923 * umem_cache's, specifying ourself as a callback for each...
2924 2924 * this is the equivalent of '::walk umem_cache .::umem_verify'
2925 2925 */
2926 2926 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2927 2927 "Cache Name", "Addr", "Cache Integrity");
2928 2928 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2929 2929 }
2930 2930
2931 2931 return (DCMD_OK);
2932 2932 }
2933 2933
2934 2934 typedef struct vmem_node {
2935 2935 struct vmem_node *vn_next;
2936 2936 struct vmem_node *vn_parent;
2937 2937 struct vmem_node *vn_sibling;
2938 2938 struct vmem_node *vn_children;
2939 2939 uintptr_t vn_addr;
2940 2940 int vn_marked;
2941 2941 vmem_t vn_vmem;
2942 2942 } vmem_node_t;
2943 2943
2944 2944 typedef struct vmem_walk {
2945 2945 vmem_node_t *vw_root;
2946 2946 vmem_node_t *vw_current;
2947 2947 } vmem_walk_t;
2948 2948
2949 2949 int
2950 2950 vmem_walk_init(mdb_walk_state_t *wsp)
2951 2951 {
2952 2952 uintptr_t vaddr, paddr;
2953 2953 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2954 2954 vmem_walk_t *vw;
2955 2955
2956 2956 if (umem_readvar(&vaddr, "vmem_list") == -1) {
2957 2957 mdb_warn("couldn't read 'vmem_list'");
2958 2958 return (WALK_ERR);
2959 2959 }
2960 2960
2961 2961 while (vaddr != NULL) {
2962 2962 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2963 2963 vp->vn_addr = vaddr;
2964 2964 vp->vn_next = head;
2965 2965 head = vp;
2966 2966
2967 2967 if (vaddr == wsp->walk_addr)
2968 2968 current = vp;
2969 2969
2970 2970 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2971 2971 mdb_warn("couldn't read vmem_t at %p", vaddr);
2972 2972 goto err;
2973 2973 }
2974 2974
2975 2975 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2976 2976 }
2977 2977
2978 2978 for (vp = head; vp != NULL; vp = vp->vn_next) {
2979 2979
2980 2980 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2981 2981 vp->vn_sibling = root;
2982 2982 root = vp;
2983 2983 continue;
2984 2984 }
2985 2985
2986 2986 for (parent = head; parent != NULL; parent = parent->vn_next) {
2987 2987 if (parent->vn_addr != paddr)
2988 2988 continue;
2989 2989 vp->vn_sibling = parent->vn_children;
2990 2990 parent->vn_children = vp;
2991 2991 vp->vn_parent = parent;
2992 2992 break;
2993 2993 }
2994 2994
2995 2995 if (parent == NULL) {
2996 2996 mdb_warn("couldn't find %p's parent (%p)\n",
2997 2997 vp->vn_addr, paddr);
2998 2998 goto err;
2999 2999 }
3000 3000 }
3001 3001
3002 3002 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3003 3003 vw->vw_root = root;
3004 3004
3005 3005 if (current != NULL)
3006 3006 vw->vw_current = current;
3007 3007 else
3008 3008 vw->vw_current = root;
3009 3009
3010 3010 wsp->walk_data = vw;
3011 3011 return (WALK_NEXT);
3012 3012 err:
3013 3013 for (vp = head; head != NULL; vp = head) {
3014 3014 head = vp->vn_next;
3015 3015 mdb_free(vp, sizeof (vmem_node_t));
3016 3016 }
3017 3017
3018 3018 return (WALK_ERR);
3019 3019 }
3020 3020
3021 3021 int
3022 3022 vmem_walk_step(mdb_walk_state_t *wsp)
3023 3023 {
3024 3024 vmem_walk_t *vw = wsp->walk_data;
3025 3025 vmem_node_t *vp;
3026 3026 int rval;
3027 3027
3028 3028 if ((vp = vw->vw_current) == NULL)
3029 3029 return (WALK_DONE);
3030 3030
3031 3031 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3032 3032
3033 3033 if (vp->vn_children != NULL) {
3034 3034 vw->vw_current = vp->vn_children;
3035 3035 return (rval);
3036 3036 }
3037 3037
3038 3038 do {
3039 3039 vw->vw_current = vp->vn_sibling;
3040 3040 vp = vp->vn_parent;
3041 3041 } while (vw->vw_current == NULL && vp != NULL);
3042 3042
3043 3043 return (rval);
3044 3044 }
3045 3045
3046 3046 /*
3047 3047 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3048 3048 * children are visited before their parent. We perform the postfix walk
3049 3049 * iteratively (rather than recursively) to allow mdb to regain control
3050 3050 * after each callback.
3051 3051 */
3052 3052 int
3053 3053 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3054 3054 {
3055 3055 vmem_walk_t *vw = wsp->walk_data;
3056 3056 vmem_node_t *vp = vw->vw_current;
3057 3057 int rval;
3058 3058
3059 3059 /*
3060 3060 * If this node is marked, then we know that we have already visited
3061 3061 * all of its children. If the node has any siblings, they need to
3062 3062 * be visited next; otherwise, we need to visit the parent. Note
3063 3063 * that vp->vn_marked will only be zero on the first invocation of
3064 3064 * the step function.
3065 3065 */
3066 3066 if (vp->vn_marked) {
3067 3067 if (vp->vn_sibling != NULL)
3068 3068 vp = vp->vn_sibling;
3069 3069 else if (vp->vn_parent != NULL)
3070 3070 vp = vp->vn_parent;
3071 3071 else {
3072 3072 /*
3073 3073 * We have neither a parent, nor a sibling, and we
3074 3074 * have already been visited; we're done.
3075 3075 */
3076 3076 return (WALK_DONE);
3077 3077 }
3078 3078 }
3079 3079
3080 3080 /*
3081 3081 * Before we visit this node, visit its children.
3082 3082 */
3083 3083 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3084 3084 vp = vp->vn_children;
3085 3085
3086 3086 vp->vn_marked = 1;
3087 3087 vw->vw_current = vp;
3088 3088 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3089 3089
3090 3090 return (rval);
3091 3091 }
3092 3092
3093 3093 void
3094 3094 vmem_walk_fini(mdb_walk_state_t *wsp)
3095 3095 {
3096 3096 vmem_walk_t *vw = wsp->walk_data;
3097 3097 vmem_node_t *root = vw->vw_root;
3098 3098 int done;
3099 3099
3100 3100 if (root == NULL)
3101 3101 return;
3102 3102
3103 3103 if ((vw->vw_root = root->vn_children) != NULL)
3104 3104 vmem_walk_fini(wsp);
3105 3105
3106 3106 vw->vw_root = root->vn_sibling;
3107 3107 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3108 3108 mdb_free(root, sizeof (vmem_node_t));
3109 3109
3110 3110 if (done) {
3111 3111 mdb_free(vw, sizeof (vmem_walk_t));
3112 3112 } else {
3113 3113 vmem_walk_fini(wsp);
3114 3114 }
3115 3115 }
3116 3116
3117 3117 typedef struct vmem_seg_walk {
3118 3118 uint8_t vsw_type;
3119 3119 uintptr_t vsw_start;
3120 3120 uintptr_t vsw_current;
3121 3121 } vmem_seg_walk_t;
3122 3122
3123 3123 /*ARGSUSED*/
3124 3124 int
3125 3125 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3126 3126 {
3127 3127 vmem_seg_walk_t *vsw;
3128 3128
3129 3129 if (wsp->walk_addr == NULL) {
3130 3130 mdb_warn("vmem_%s does not support global walks\n", name);
3131 3131 return (WALK_ERR);
3132 3132 }
3133 3133
3134 3134 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3135 3135
3136 3136 vsw->vsw_type = type;
3137 3137 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3138 3138 vsw->vsw_current = vsw->vsw_start;
3139 3139
3140 3140 return (WALK_NEXT);
3141 3141 }
3142 3142
3143 3143 /*
3144 3144 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3145 3145 */
3146 3146 #define VMEM_NONE 0
3147 3147
3148 3148 int
3149 3149 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3150 3150 {
3151 3151 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3152 3152 }
3153 3153
3154 3154 int
3155 3155 vmem_free_walk_init(mdb_walk_state_t *wsp)
3156 3156 {
3157 3157 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3158 3158 }
3159 3159
3160 3160 int
3161 3161 vmem_span_walk_init(mdb_walk_state_t *wsp)
3162 3162 {
3163 3163 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3164 3164 }
3165 3165
3166 3166 int
3167 3167 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3168 3168 {
3169 3169 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3170 3170 }
3171 3171
3172 3172 int
3173 3173 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3174 3174 {
3175 3175 vmem_seg_t seg;
3176 3176 vmem_seg_walk_t *vsw = wsp->walk_data;
3177 3177 uintptr_t addr = vsw->vsw_current;
3178 3178 static size_t seg_size = 0;
3179 3179 int rval;
3180 3180
3181 3181 if (!seg_size) {
3182 3182 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3183 3183 mdb_warn("failed to read 'vmem_seg_size'");
3184 3184 seg_size = sizeof (vmem_seg_t);
3185 3185 }
3186 3186 }
3187 3187
3188 3188 if (seg_size < sizeof (seg))
3189 3189 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3190 3190
3191 3191 if (mdb_vread(&seg, seg_size, addr) == -1) {
3192 3192 mdb_warn("couldn't read vmem_seg at %p", addr);
3193 3193 return (WALK_ERR);
3194 3194 }
3195 3195
3196 3196 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3197 3197 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3198 3198 rval = WALK_NEXT;
3199 3199 } else {
3200 3200 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3201 3201 }
3202 3202
3203 3203 if (vsw->vsw_current == vsw->vsw_start)
3204 3204 return (WALK_DONE);
3205 3205
3206 3206 return (rval);
3207 3207 }
3208 3208
3209 3209 void
3210 3210 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3211 3211 {
3212 3212 vmem_seg_walk_t *vsw = wsp->walk_data;
3213 3213
3214 3214 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3215 3215 }
3216 3216
3217 3217 #define VMEM_NAMEWIDTH 22
3218 3218
3219 3219 int
3220 3220 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3221 3221 {
3222 3222 vmem_t v, parent;
3223 3223 uintptr_t paddr;
3224 3224 int ident = 0;
3225 3225 char c[VMEM_NAMEWIDTH];
3226 3226
3227 3227 if (!(flags & DCMD_ADDRSPEC)) {
3228 3228 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3229 3229 mdb_warn("can't walk vmem");
3230 3230 return (DCMD_ERR);
3231 3231 }
3232 3232 return (DCMD_OK);
3233 3233 }
3234 3234
3235 3235 if (DCMD_HDRSPEC(flags))
3236 3236 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3237 3237 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3238 3238 "TOTAL", "SUCCEED", "FAIL");
3239 3239
3240 3240 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3241 3241 mdb_warn("couldn't read vmem at %p", addr);
3242 3242 return (DCMD_ERR);
3243 3243 }
3244 3244
3245 3245 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3246 3246 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3247 3247 mdb_warn("couldn't trace %p's ancestry", addr);
3248 3248 ident = 0;
3249 3249 break;
3250 3250 }
3251 3251 paddr = (uintptr_t)parent.vm_source;
3252 3252 }
3253 3253
3254 3254 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3255 3255
3256 3256 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3257 3257 addr, VMEM_NAMEWIDTH, c,
3258 3258 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3259 3259 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3260 3260
3261 3261 return (DCMD_OK);
3262 3262 }
3263 3263
3264 3264 void
3265 3265 vmem_seg_help(void)
3266 3266 {
3267 3267 mdb_printf("%s\n",
3268 3268 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3269 3269 "\n"
3270 3270 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3271 3271 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3272 3272 "information.\n");
3273 3273 mdb_dec_indent(2);
3274 3274 mdb_printf("%<b>OPTIONS%</b>\n");
3275 3275 mdb_inc_indent(2);
3276 3276 mdb_printf("%s",
3277 3277 " -v Display the full content of the vmem_seg, including its stack trace\n"
3278 3278 " -s report the size of the segment, instead of the end address\n"
3279 3279 " -c caller\n"
3280 3280 " filter out segments without the function/PC in their stack trace\n"
3281 3281 " -e earliest\n"
3282 3282 " filter out segments timestamped before earliest\n"
3283 3283 " -l latest\n"
3284 3284 " filter out segments timestamped after latest\n"
3285 3285 " -m minsize\n"
3286 3286 " filer out segments smaller than minsize\n"
3287 3287 " -M maxsize\n"
3288 3288 " filer out segments larger than maxsize\n"
3289 3289 " -t thread\n"
3290 3290 " filter out segments not involving thread\n"
3291 3291 " -T type\n"
3292 3292 " filter out segments not of type 'type'\n"
3293 3293 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3294 3294 }
3295 3295
3296 3296
3297 3297 /*ARGSUSED*/
3298 3298 int
3299 3299 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3300 3300 {
3301 3301 vmem_seg_t vs;
3302 3302 uintptr_t *stk = vs.vs_stack;
3303 3303 uintptr_t sz;
3304 3304 uint8_t t;
3305 3305 const char *type = NULL;
3306 3306 GElf_Sym sym;
3307 3307 char c[MDB_SYM_NAMLEN];
3308 3308 int no_debug;
3309 3309 int i;
3310 3310 int depth;
3311 3311 uintptr_t laddr, haddr;
3312 3312
3313 3313 uintptr_t caller = NULL, thread = NULL;
3314 3314 uintptr_t minsize = 0, maxsize = 0;
3315 3315
3316 3316 hrtime_t earliest = 0, latest = 0;
3317 3317
3318 3318 uint_t size = 0;
3319 3319 uint_t verbose = 0;
3320 3320
3321 3321 if (!(flags & DCMD_ADDRSPEC))
3322 3322 return (DCMD_USAGE);
3323 3323
3324 3324 if (mdb_getopts(argc, argv,
3325 3325 'c', MDB_OPT_UINTPTR, &caller,
3326 3326 'e', MDB_OPT_UINT64, &earliest,
3327 3327 'l', MDB_OPT_UINT64, &latest,
3328 3328 's', MDB_OPT_SETBITS, TRUE, &size,
3329 3329 'm', MDB_OPT_UINTPTR, &minsize,
3330 3330 'M', MDB_OPT_UINTPTR, &maxsize,
3331 3331 't', MDB_OPT_UINTPTR, &thread,
3332 3332 'T', MDB_OPT_STR, &type,
3333 3333 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3334 3334 NULL) != argc)
3335 3335 return (DCMD_USAGE);
3336 3336
3337 3337 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3338 3338 if (verbose) {
3339 3339 mdb_printf("%16s %4s %16s %16s %16s\n"
3340 3340 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3341 3341 "ADDR", "TYPE", "START", "END", "SIZE",
3342 3342 "", "", "THREAD", "TIMESTAMP", "");
3343 3343 } else {
3344 3344 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3345 3345 "START", size? "SIZE" : "END", "WHO");
3346 3346 }
3347 3347 }
3348 3348
3349 3349 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3350 3350 mdb_warn("couldn't read vmem_seg at %p", addr);
3351 3351 return (DCMD_ERR);
3352 3352 }
3353 3353
3354 3354 if (type != NULL) {
3355 3355 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3356 3356 t = VMEM_ALLOC;
3357 3357 else if (strcmp(type, "FREE") == 0)
3358 3358 t = VMEM_FREE;
3359 3359 else if (strcmp(type, "SPAN") == 0)
3360 3360 t = VMEM_SPAN;
3361 3361 else if (strcmp(type, "ROTR") == 0 ||
3362 3362 strcmp(type, "ROTOR") == 0)
3363 3363 t = VMEM_ROTOR;
3364 3364 else if (strcmp(type, "WLKR") == 0 ||
3365 3365 strcmp(type, "WALKER") == 0)
3366 3366 t = VMEM_WALKER;
3367 3367 else {
3368 3368 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3369 3369 type);
3370 3370 return (DCMD_ERR);
3371 3371 }
3372 3372
3373 3373 if (vs.vs_type != t)
3374 3374 return (DCMD_OK);
3375 3375 }
3376 3376
3377 3377 sz = vs.vs_end - vs.vs_start;
3378 3378
3379 3379 if (minsize != 0 && sz < minsize)
3380 3380 return (DCMD_OK);
3381 3381
3382 3382 if (maxsize != 0 && sz > maxsize)
3383 3383 return (DCMD_OK);
3384 3384
3385 3385 t = vs.vs_type;
3386 3386 depth = vs.vs_depth;
3387 3387
3388 3388 /*
3389 3389 * debug info, when present, is only accurate for VMEM_ALLOC segments
3390 3390 */
3391 3391 no_debug = (t != VMEM_ALLOC) ||
3392 3392 (depth == 0 || depth > VMEM_STACK_DEPTH);
3393 3393
3394 3394 if (no_debug) {
3395 3395 if (caller != NULL || thread != NULL || earliest != 0 ||
3396 3396 latest != 0)
3397 3397 return (DCMD_OK); /* not enough info */
3398 3398 } else {
3399 3399 if (caller != NULL) {
3400 3400 laddr = caller;
3401 3401 haddr = caller + sizeof (caller);
3402 3402
3403 3403 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3404 3404 sizeof (c), &sym) != -1 &&
3405 3405 caller == (uintptr_t)sym.st_value) {
3406 3406 /*
3407 3407 * We were provided an exact symbol value; any
3408 3408 * address in the function is valid.
3409 3409 */
3410 3410 laddr = (uintptr_t)sym.st_value;
3411 3411 haddr = (uintptr_t)sym.st_value + sym.st_size;
3412 3412 }
3413 3413
3414 3414 for (i = 0; i < depth; i++)
3415 3415 if (vs.vs_stack[i] >= laddr &&
3416 3416 vs.vs_stack[i] < haddr)
3417 3417 break;
3418 3418
3419 3419 if (i == depth)
3420 3420 return (DCMD_OK);
3421 3421 }
3422 3422
3423 3423 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3424 3424 return (DCMD_OK);
3425 3425
3426 3426 if (earliest != 0 && vs.vs_timestamp < earliest)
3427 3427 return (DCMD_OK);
3428 3428
3429 3429 if (latest != 0 && vs.vs_timestamp > latest)
3430 3430 return (DCMD_OK);
3431 3431 }
3432 3432
3433 3433 type = (t == VMEM_ALLOC ? "ALLC" :
3434 3434 t == VMEM_FREE ? "FREE" :
3435 3435 t == VMEM_SPAN ? "SPAN" :
3436 3436 t == VMEM_ROTOR ? "ROTR" :
3437 3437 t == VMEM_WALKER ? "WLKR" :
3438 3438 "????");
3439 3439
3440 3440 if (flags & DCMD_PIPE_OUT) {
3441 3441 mdb_printf("%#r\n", addr);
3442 3442 return (DCMD_OK);
3443 3443 }
3444 3444
3445 3445 if (verbose) {
3446 3446 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3447 3447 addr, type, vs.vs_start, vs.vs_end, sz);
3448 3448
3449 3449 if (no_debug)
3450 3450 return (DCMD_OK);
3451 3451
3452 3452 mdb_printf("%16s %4s %16d %16llx\n",
3453 3453 "", "", vs.vs_thread, vs.vs_timestamp);
3454 3454
3455 3455 mdb_inc_indent(17);
3456 3456 for (i = 0; i < depth; i++) {
3457 3457 mdb_printf("%a\n", stk[i]);
3458 3458 }
3459 3459 mdb_dec_indent(17);
3460 3460 mdb_printf("\n");
3461 3461 } else {
3462 3462 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3463 3463 vs.vs_start, size? sz : vs.vs_end);
3464 3464
3465 3465 if (no_debug) {
3466 3466 mdb_printf("\n");
3467 3467 return (DCMD_OK);
3468 3468 }
3469 3469
3470 3470 for (i = 0; i < depth; i++) {
3471 3471 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3472 3472 c, sizeof (c), &sym) == -1)
3473 3473 continue;
3474 3474 if (is_umem_sym(c, "vmem_"))
3475 3475 continue;
3476 3476 break;
3477 3477 }
3478 3478 mdb_printf(" %a\n", stk[i]);
3479 3479 }
3480 3480 return (DCMD_OK);
3481 3481 }
3482 3482
3483 3483 /*ARGSUSED*/
3484 3484 static int
3485 3485 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3486 3486 {
3487 3487 char name[UMEM_CACHE_NAMELEN + 1];
3488 3488 hrtime_t delta;
3489 3489 int i, depth;
3490 3490
3491 3491 if (bcp->bc_timestamp == 0)
3492 3492 return (WALK_DONE);
3493 3493
3494 3494 if (*newest == 0)
3495 3495 *newest = bcp->bc_timestamp;
3496 3496
3497 3497 delta = *newest - bcp->bc_timestamp;
3498 3498 depth = MIN(bcp->bc_depth, umem_stack_depth);
3499 3499
3500 3500 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3501 3501 &bcp->bc_cache->cache_name) <= 0)
3502 3502 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3503 3503
3504 3504 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3505 3505 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3506 3506
3507 3507 for (i = 0; i < depth; i++)
3508 3508 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3509 3509
3510 3510 return (WALK_NEXT);
3511 3511 }
3512 3512
3513 3513 int
3514 3514 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3515 3515 {
3516 3516 const char *logname = "umem_transaction_log";
3517 3517 hrtime_t newest = 0;
3518 3518
3519 3519 if ((flags & DCMD_ADDRSPEC) || argc > 1)
3520 3520 return (DCMD_USAGE);
3521 3521
3522 3522 if (argc > 0) {
3523 3523 if (argv->a_type != MDB_TYPE_STRING)
3524 3524 return (DCMD_USAGE);
3525 3525 if (strcmp(argv->a_un.a_str, "fail") == 0)
3526 3526 logname = "umem_failure_log";
3527 3527 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3528 3528 logname = "umem_slab_log";
3529 3529 else
3530 3530 return (DCMD_USAGE);
3531 3531 }
3532 3532
3533 3533 if (umem_readvar(&addr, logname) == -1) {
3534 3534 mdb_warn("failed to read %s log header pointer");
3535 3535 return (DCMD_ERR);
3536 3536 }
3537 3537
3538 3538 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3539 3539 mdb_warn("failed to walk umem log");
3540 3540 return (DCMD_ERR);
3541 3541 }
3542 3542
3543 3543 return (DCMD_OK);
3544 3544 }
3545 3545
3546 3546 /*
3547 3547 * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3548 3548 * The first piece is a structure which we use to accumulate umem_cache_t
3549 3549 * addresses of interest. The umc_add is used as a callback for the umem_cache
3550 3550 * walker; we either add all caches, or ones named explicitly as arguments.
3551 3551 */
3552 3552
3553 3553 typedef struct umclist {
3554 3554 const char *umc_name; /* Name to match (or NULL) */
3555 3555 uintptr_t *umc_caches; /* List of umem_cache_t addrs */
3556 3556 int umc_nelems; /* Num entries in umc_caches */
3557 3557 int umc_size; /* Size of umc_caches array */
3558 3558 } umclist_t;
3559 3559
3560 3560 static int
3561 3561 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3562 3562 {
3563 3563 void *p;
3564 3564 int s;
3565 3565
3566 3566 if (umc->umc_name == NULL ||
3567 3567 strcmp(cp->cache_name, umc->umc_name) == 0) {
3568 3568 /*
3569 3569 * If we have a match, grow our array (if necessary), and then
3570 3570 * add the virtual address of the matching cache to our list.
3571 3571 */
3572 3572 if (umc->umc_nelems >= umc->umc_size) {
3573 3573 s = umc->umc_size ? umc->umc_size * 2 : 256;
3574 3574 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3575 3575
3576 3576 bcopy(umc->umc_caches, p,
3577 3577 sizeof (uintptr_t) * umc->umc_size);
3578 3578
3579 3579 umc->umc_caches = p;
3580 3580 umc->umc_size = s;
3581 3581 }
3582 3582
3583 3583 umc->umc_caches[umc->umc_nelems++] = addr;
3584 3584 return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3585 3585 }
3586 3586
3587 3587 return (WALK_NEXT);
3588 3588 }
3589 3589
3590 3590 /*
3591 3591 * The second piece of ::umausers is a hash table of allocations. Each
3592 3592 * allocation owner is identified by its stack trace and data_size. We then
3593 3593 * track the total bytes of all such allocations, and the number of allocations
3594 3594 * to report at the end. Once we have a list of caches, we walk through the
3595 3595 * allocated bufctls of each, and update our hash table accordingly.
3596 3596 */
3597 3597
3598 3598 typedef struct umowner {
3599 3599 struct umowner *umo_head; /* First hash elt in bucket */
3600 3600 struct umowner *umo_next; /* Next hash elt in chain */
3601 3601 size_t umo_signature; /* Hash table signature */
3602 3602 uint_t umo_num; /* Number of allocations */
3603 3603 size_t umo_data_size; /* Size of each allocation */
3604 3604 size_t umo_total_size; /* Total bytes of allocation */
3605 3605 int umo_depth; /* Depth of stack trace */
3606 3606 uintptr_t *umo_stack; /* Stack trace */
3607 3607 } umowner_t;
3608 3608
3609 3609 typedef struct umusers {
3610 3610 const umem_cache_t *umu_cache; /* Current umem cache */
3611 3611 umowner_t *umu_hash; /* Hash table of owners */
3612 3612 uintptr_t *umu_stacks; /* stacks for owners */
3613 3613 int umu_nelems; /* Number of entries in use */
3614 3614 int umu_size; /* Total number of entries */
3615 3615 } umusers_t;
3616 3616
3617 3617 static void
3618 3618 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3619 3619 size_t size, size_t data_size)
3620 3620 {
3621 3621 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3622 3622 size_t bucket, signature = data_size;
3623 3623 umowner_t *umo, *umoend;
3624 3624
3625 3625 /*
3626 3626 * If the hash table is full, double its size and rehash everything.
3627 3627 */
3628 3628 if (umu->umu_nelems >= umu->umu_size) {
3629 3629 int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3630 3630 size_t umowner_size = sizeof (umowner_t);
3631 3631 size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3632 3632 uintptr_t *new_stacks;
3633 3633
3634 3634 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3635 3635 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3636 3636
3637 3637 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3638 3638 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3639 3639 umu->umu_hash = umo;
3640 3640 umu->umu_stacks = new_stacks;
3641 3641 umu->umu_size = s;
3642 3642
3643 3643 umoend = umu->umu_hash + umu->umu_size;
3644 3644 for (umo = umu->umu_hash; umo < umoend; umo++) {
3645 3645 umo->umo_head = NULL;
3646 3646 umo->umo_stack = &umu->umu_stacks[
3647 3647 umem_stack_depth * (umo - umu->umu_hash)];
3648 3648 }
3649 3649
3650 3650 umoend = umu->umu_hash + umu->umu_nelems;
3651 3651 for (umo = umu->umu_hash; umo < umoend; umo++) {
3652 3652 bucket = umo->umo_signature & (umu->umu_size - 1);
3653 3653 umo->umo_next = umu->umu_hash[bucket].umo_head;
3654 3654 umu->umu_hash[bucket].umo_head = umo;
3655 3655 }
3656 3656 }
3657 3657
3658 3658 /*
3659 3659 * Finish computing the hash signature from the stack trace, and then
3660 3660 * see if the owner is in the hash table. If so, update our stats.
3661 3661 */
3662 3662 for (i = 0; i < depth; i++)
3663 3663 signature += bcp->bc_stack[i];
3664 3664
3665 3665 bucket = signature & (umu->umu_size - 1);
3666 3666
3667 3667 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3668 3668 if (umo->umo_signature == signature) {
3669 3669 size_t difference = 0;
3670 3670
3671 3671 difference |= umo->umo_data_size - data_size;
3672 3672 difference |= umo->umo_depth - depth;
3673 3673
3674 3674 for (i = 0; i < depth; i++) {
3675 3675 difference |= umo->umo_stack[i] -
3676 3676 bcp->bc_stack[i];
3677 3677 }
3678 3678
3679 3679 if (difference == 0) {
3680 3680 umo->umo_total_size += size;
3681 3681 umo->umo_num++;
3682 3682 return;
3683 3683 }
3684 3684 }
3685 3685 }
3686 3686
3687 3687 /*
3688 3688 * If the owner is not yet hashed, grab the next element and fill it
3689 3689 * in based on the allocation information.
3690 3690 */
3691 3691 umo = &umu->umu_hash[umu->umu_nelems++];
3692 3692 umo->umo_next = umu->umu_hash[bucket].umo_head;
3693 3693 umu->umu_hash[bucket].umo_head = umo;
3694 3694
3695 3695 umo->umo_signature = signature;
3696 3696 umo->umo_num = 1;
3697 3697 umo->umo_data_size = data_size;
3698 3698 umo->umo_total_size = size;
3699 3699 umo->umo_depth = depth;
3700 3700
3701 3701 for (i = 0; i < depth; i++)
3702 3702 umo->umo_stack[i] = bcp->bc_stack[i];
3703 3703 }
3704 3704
3705 3705 /*
3706 3706 * When ::umausers is invoked without the -f flag, we simply update our hash
3707 3707 * table with the information from each allocated bufctl.
3708 3708 */
3709 3709 /*ARGSUSED*/
3710 3710 static int
3711 3711 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3712 3712 {
3713 3713 const umem_cache_t *cp = umu->umu_cache;
3714 3714
3715 3715 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3716 3716 return (WALK_NEXT);
3717 3717 }
3718 3718
3719 3719 /*
3720 3720 * When ::umausers is invoked with the -f flag, we print out the information
3721 3721 * for each bufctl as well as updating the hash table.
3722 3722 */
3723 3723 static int
3724 3724 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3725 3725 {
3726 3726 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3727 3727 const umem_cache_t *cp = umu->umu_cache;
3728 3728
3729 3729 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3730 3730 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3731 3731
3732 3732 for (i = 0; i < depth; i++)
3733 3733 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3734 3734
3735 3735 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3736 3736 return (WALK_NEXT);
3737 3737 }
3738 3738
3739 3739 /*
3740 3740 * We sort our results by allocation size before printing them.
3741 3741 */
3742 3742 static int
3743 3743 umownercmp(const void *lp, const void *rp)
3744 3744 {
3745 3745 const umowner_t *lhs = lp;
3746 3746 const umowner_t *rhs = rp;
3747 3747
3748 3748 return (rhs->umo_total_size - lhs->umo_total_size);
3749 3749 }
3750 3750
3751 3751 /*
3752 3752 * The main engine of ::umausers is relatively straightforward: First we
3753 3753 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3754 3754 * iterate over the allocated bufctls of each cache in the list. Finally,
3755 3755 * we sort and print our results.
3756 3756 */
3757 3757 /*ARGSUSED*/
3758 3758 int
3759 3759 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3760 3760 {
3761 3761 int mem_threshold = 8192; /* Minimum # bytes for printing */
3762 3762 int cnt_threshold = 100; /* Minimum # blocks for printing */
3763 3763 int audited_caches = 0; /* Number of UMF_AUDIT caches found */
3764 3764 int do_all_caches = 1; /* Do all caches (no arguments) */
3765 3765 int opt_e = FALSE; /* Include "small" users */
3766 3766 int opt_f = FALSE; /* Print stack traces */
3767 3767
3768 3768 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3769 3769 umowner_t *umo, *umoend;
3770 3770 int i, oelems;
3771 3771
3772 3772 umclist_t umc;
3773 3773 umusers_t umu;
3774 3774
3775 3775 if (flags & DCMD_ADDRSPEC)
3776 3776 return (DCMD_USAGE);
3777 3777
3778 3778 bzero(&umc, sizeof (umc));
3779 3779 bzero(&umu, sizeof (umu));
3780 3780
3781 3781 while ((i = mdb_getopts(argc, argv,
3782 3782 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3783 3783 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3784 3784
3785 3785 argv += i; /* skip past options we just processed */
3786 3786 argc -= i; /* adjust argc */
3787 3787
3788 3788 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3789 3789 return (DCMD_USAGE);
3790 3790
3791 3791 oelems = umc.umc_nelems;
3792 3792 umc.umc_name = argv->a_un.a_str;
3793 3793 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3794 3794
3795 3795 if (umc.umc_nelems == oelems) {
3796 3796 mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3797 3797 return (DCMD_ERR);
3798 3798 }
3799 3799
3800 3800 do_all_caches = 0;
3801 3801 argv++;
3802 3802 argc--;
3803 3803 }
3804 3804
3805 3805 if (opt_e)
3806 3806 mem_threshold = cnt_threshold = 0;
3807 3807
3808 3808 if (opt_f)
3809 3809 callback = (mdb_walk_cb_t)umause2;
3810 3810
3811 3811 if (do_all_caches) {
3812 3812 umc.umc_name = NULL; /* match all cache names */
3813 3813 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3814 3814 }
3815 3815
3816 3816 for (i = 0; i < umc.umc_nelems; i++) {
3817 3817 uintptr_t cp = umc.umc_caches[i];
3818 3818 umem_cache_t c;
3819 3819
3820 3820 if (mdb_vread(&c, sizeof (c), cp) == -1) {
3821 3821 mdb_warn("failed to read cache at %p", cp);
3822 3822 continue;
3823 3823 }
3824 3824
3825 3825 if (!(c.cache_flags & UMF_AUDIT)) {
3826 3826 if (!do_all_caches) {
3827 3827 mdb_warn("UMF_AUDIT is not enabled for %s\n",
3828 3828 c.cache_name);
3829 3829 }
3830 3830 continue;
3831 3831 }
3832 3832
3833 3833 umu.umu_cache = &c;
3834 3834 (void) mdb_pwalk("bufctl", callback, &umu, cp);
3835 3835 audited_caches++;
3836 3836 }
3837 3837
3838 3838 if (audited_caches == 0 && do_all_caches) {
3839 3839 mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3840 3840 return (DCMD_ERR);
3841 3841 }
3842 3842
3843 3843 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3844 3844 umoend = umu.umu_hash + umu.umu_nelems;
3845 3845
3846 3846 for (umo = umu.umu_hash; umo < umoend; umo++) {
3847 3847 if (umo->umo_total_size < mem_threshold &&
3848 3848 umo->umo_num < cnt_threshold)
3849 3849 continue;
3850 3850 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3851 3851 umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3852 3852 for (i = 0; i < umo->umo_depth; i++)
3853 3853 mdb_printf("\t %a\n", umo->umo_stack[i]);
3854 3854 }
3855 3855
3856 3856 return (DCMD_OK);
3857 3857 }
3858 3858
3859 3859 struct malloc_data {
3860 3860 uint32_t malloc_size;
3861 3861 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3862 3862 };
3863 3863
3864 3864 #ifdef _LP64
3865 3865 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3866 3866 #else
3867 3867 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data))
3868 3868 #endif
3869 3869
3870 3870 typedef struct umem_malloc_info {
3871 3871 size_t um_total; /* total allocated buffers */
3872 3872 size_t um_malloc; /* malloc buffers */
3873 3873 size_t um_malloc_size; /* sum of malloc buffer sizes */
3874 3874 size_t um_malloc_overhead; /* sum of in-chunk overheads */
3875 3875
3876 3876 umem_cache_t *um_cp;
3877 3877
3878 3878 uint_t *um_bucket;
3879 3879 } umem_malloc_info_t;
3880 3880
3881 3881 static void
3882 3882 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3883 3883 size_t maxbuckets, size_t minbucketsize, int geometric)
3884 3884 {
3885 3885 uint64_t um_malloc;
3886 3886 int minb = -1;
3887 3887 int maxb = -1;
3888 3888 int buckets;
3889 3889 int nbucks;
3890 3890 int i;
3891 3891 int b;
3892 3892 const int *distarray;
3893 3893
3894 3894 minb = (int)minmalloc;
3895 3895 maxb = (int)maxmalloc;
3896 3896
3897 3897 nbucks = buckets = maxb - minb + 1;
3898 3898
3899 3899 um_malloc = 0;
3900 3900 for (b = minb; b <= maxb; b++)
3901 3901 um_malloc += um_bucket[b];
3902 3902
3903 3903 if (maxbuckets != 0)
3904 3904 buckets = MIN(buckets, maxbuckets);
3905 3905
3906 3906 if (minbucketsize > 1) {
3907 3907 buckets = MIN(buckets, nbucks/minbucketsize);
3908 3908 if (buckets == 0) {
3909 3909 buckets = 1;
3910 3910 minbucketsize = nbucks;
3911 3911 }
3912 3912 }
3913 3913
3914 3914 if (geometric)
3915 3915 distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3916 3916 else
3917 3917 distarray = dist_linear(buckets, minb, maxb);
3918 3918
3919 3919 dist_print_header("malloc size", 11, "count");
3920 3920 for (i = 0; i < buckets; i++) {
3921 3921 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3922 3922 }
3923 3923 mdb_printf("\n");
3924 3924 }
3925 3925
3926 3926 /*
3927 3927 * A malloc()ed buffer looks like:
3928 3928 *
3929 3929 * <----------- mi.malloc_size --->
3930 3930 * <----------- cp.cache_bufsize ------------------>
3931 3931 * <----------- cp.cache_chunksize -------------------------------->
3932 3932 * +-------+-----------------------+---------------+---------------+
3933 3933 * |/tag///| mallocsz |/round-off/////|/debug info////|
3934 3934 * +-------+---------------------------------------+---------------+
3935 3935 * <-- usable space ------>
3936 3936 *
3937 3937 * mallocsz is the argument to malloc(3C).
3938 3938 * mi.malloc_size is the actual size passed to umem_alloc(), which
3939 3939 * is rounded up to the smallest available cache size, which is
3940 3940 * cache_bufsize. If there is debugging or alignment overhead in
3941 3941 * the cache, that is reflected in a larger cache_chunksize.
3942 3942 *
3943 3943 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3944 3944 * depending upon the ISA's alignment requirements. For 32-bit allocations,
3945 3945 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes,
3946 3946 * the tag has 8 bytes of padding before it.
3947 3947 *
3948 3948 * 32-byte, 64-byte buffers <= 8 bytes:
3949 3949 * +-------+-------+--------- ...
3950 3950 * |/size//|/stat//| mallocsz ...
3951 3951 * +-------+-------+--------- ...
3952 3952 * ^
3953 3953 * pointer returned from malloc(3C)
3954 3954 *
3955 3955 * 64-byte buffers > 8 bytes:
3956 3956 * +---------------+-------+-------+--------- ...
3957 3957 * |/padding///////|/size//|/stat//| mallocsz ...
3958 3958 * +---------------+-------+-------+--------- ...
3959 3959 * ^
3960 3960 * pointer returned from malloc(3C)
3961 3961 *
3962 3962 * The "size" field is "malloc_size", which is mallocsz + the padding.
3963 3963 * The "stat" field is derived from malloc_size, and functions as a
3964 3964 * validation that this buffer is actually from malloc(3C).
3965 3965 */
3966 3966 /*ARGSUSED*/
3967 3967 static int
3968 3968 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3969 3969 {
3970 3970 struct malloc_data md;
3971 3971 size_t m_addr = addr;
3972 3972 size_t overhead = sizeof (md);
3973 3973 size_t mallocsz;
3974 3974
3975 3975 ump->um_total++;
3976 3976
3977 3977 #ifdef _LP64
3978 3978 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3979 3979 m_addr += overhead;
3980 3980 overhead += sizeof (md);
3981 3981 }
3982 3982 #endif
3983 3983
3984 3984 if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3985 3985 mdb_warn("unable to read malloc header at %p", m_addr);
3986 3986 return (WALK_NEXT);
3987 3987 }
3988 3988
3989 3989 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3990 3990 case MALLOC_MAGIC:
3991 3991 #ifdef _LP64
3992 3992 case MALLOC_SECOND_MAGIC:
3993 3993 #endif
3994 3994 mallocsz = md.malloc_size - overhead;
3995 3995
3996 3996 ump->um_malloc++;
3997 3997 ump->um_malloc_size += mallocsz;
3998 3998 ump->um_malloc_overhead += overhead;
3999 3999
4000 4000 /* include round-off and debug overhead */
4001 4001 ump->um_malloc_overhead +=
4002 4002 ump->um_cp->cache_chunksize - md.malloc_size;
4003 4003
4004 4004 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4005 4005 ump->um_bucket[mallocsz]++;
4006 4006
4007 4007 break;
4008 4008 default:
4009 4009 break;
4010 4010 }
4011 4011
4012 4012 return (WALK_NEXT);
4013 4013 }
4014 4014
4015 4015 int
4016 4016 get_umem_alloc_sizes(int **out, size_t *out_num)
4017 4017 {
4018 4018 GElf_Sym sym;
4019 4019
4020 4020 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4021 4021 mdb_warn("unable to look up umem_alloc_sizes");
4022 4022 return (-1);
4023 4023 }
4024 4024
4025 4025 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4026 4026 *out_num = sym.st_size / sizeof (int);
4027 4027
4028 4028 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4029 4029 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4030 4030 *out = NULL;
4031 4031 return (-1);
4032 4032 }
4033 4033
4034 4034 return (0);
4035 4035 }
4036 4036
4037 4037
4038 4038 static int
4039 4039 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4040 4040 {
4041 4041 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4042 4042 return (WALK_NEXT);
4043 4043
4044 4044 ump->um_cp = cp;
4045 4045
4046 4046 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4047 4047 -1) {
4048 4048 mdb_warn("can't walk 'umem' for cache %p", addr);
4049 4049 return (WALK_ERR);
4050 4050 }
4051 4051
4052 4052 return (WALK_NEXT);
4053 4053 }
4054 4054
4055 4055 void
4056 4056 umem_malloc_dist_help(void)
4057 4057 {
4058 4058 mdb_printf("%s\n",
4059 4059 "report distribution of outstanding malloc()s");
4060 4060 mdb_dec_indent(2);
4061 4061 mdb_printf("%<b>OPTIONS%</b>\n");
4062 4062 mdb_inc_indent(2);
4063 4063 mdb_printf("%s",
4064 4064 " -b maxbins\n"
4065 4065 " Use at most maxbins bins for the data\n"
4066 4066 " -B minbinsize\n"
4067 4067 " Make the bins at least minbinsize bytes apart\n"
4068 4068 " -d dump the raw data out, without binning\n"
4069 4069 " -g use geometric binning instead of linear binning\n");
4070 4070 }
4071 4071
4072 4072 /*ARGSUSED*/
4073 4073 int
4074 4074 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4075 4075 {
4076 4076 umem_malloc_info_t mi;
4077 4077 uint_t geometric = 0;
4078 4078 uint_t dump = 0;
4079 4079 size_t maxbuckets = 0;
4080 4080 size_t minbucketsize = 0;
4081 4081
4082 4082 size_t minalloc = 0;
4083 4083 size_t maxalloc = UMI_MAX_BUCKET;
4084 4084
4085 4085 if (flags & DCMD_ADDRSPEC)
4086 4086 return (DCMD_USAGE);
4087 4087
4088 4088 if (mdb_getopts(argc, argv,
4089 4089 'd', MDB_OPT_SETBITS, TRUE, &dump,
4090 4090 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4091 4091 'b', MDB_OPT_UINTPTR, &maxbuckets,
4092 4092 'B', MDB_OPT_UINTPTR, &minbucketsize,
4093 4093 0) != argc)
4094 4094 return (DCMD_USAGE);
4095 4095
4096 4096 bzero(&mi, sizeof (mi));
4097 4097 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4098 4098 UM_SLEEP | UM_GC);
4099 4099
4100 4100 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4101 4101 &mi) == -1) {
4102 4102 mdb_warn("unable to walk 'umem_cache'");
4103 4103 return (DCMD_ERR);
4104 4104 }
4105 4105
4106 4106 if (dump) {
4107 4107 int i;
4108 4108 for (i = minalloc; i <= maxalloc; i++)
4109 4109 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4110 4110
4111 4111 return (DCMD_OK);
4112 4112 }
4113 4113
4114 4114 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4115 4115 maxbuckets, minbucketsize, geometric);
4116 4116
4117 4117 return (DCMD_OK);
4118 4118 }
4119 4119
4120 4120 void
4121 4121 umem_malloc_info_help(void)
4122 4122 {
4123 4123 mdb_printf("%s\n",
4124 4124 "report information about malloc()s by cache. ");
4125 4125 mdb_dec_indent(2);
4126 4126 mdb_printf("%<b>OPTIONS%</b>\n");
4127 4127 mdb_inc_indent(2);
4128 4128 mdb_printf("%s",
4129 4129 " -b maxbins\n"
4130 4130 " Use at most maxbins bins for the data\n"
4131 4131 " -B minbinsize\n"
4132 4132 " Make the bins at least minbinsize bytes apart\n"
4133 4133 " -d dump the raw distribution data without binning\n"
4134 4134 #ifndef _KMDB
4135 4135 " -g use geometric binning instead of linear binning\n"
4136 4136 #endif
4137 4137 "");
4138 4138 }
4139 4139 int
4140 4140 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4141 4141 {
4142 4142 umem_cache_t c;
4143 4143 umem_malloc_info_t mi;
4144 4144
4145 4145 int skip = 0;
4146 4146
4147 4147 size_t maxmalloc;
4148 4148 size_t overhead;
4149 4149 size_t allocated;
4150 4150 size_t avg_malloc;
4151 4151 size_t overhead_pct; /* 1000 * overhead_percent */
4152 4152
4153 4153 uint_t verbose = 0;
4154 4154 uint_t dump = 0;
4155 4155 uint_t geometric = 0;
4156 4156 size_t maxbuckets = 0;
4157 4157 size_t minbucketsize = 0;
4158 4158
4159 4159 int *alloc_sizes;
4160 4160 int idx;
4161 4161 size_t num;
4162 4162 size_t minmalloc;
4163 4163
4164 4164 if (mdb_getopts(argc, argv,
4165 4165 'd', MDB_OPT_SETBITS, TRUE, &dump,
4166 4166 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4167 4167 'b', MDB_OPT_UINTPTR, &maxbuckets,
4168 4168 'B', MDB_OPT_UINTPTR, &minbucketsize,
4169 4169 0) != argc)
4170 4170 return (DCMD_USAGE);
4171 4171
4172 4172 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4173 4173 verbose = 1;
4174 4174
4175 4175 if (!(flags & DCMD_ADDRSPEC)) {
4176 4176 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4177 4177 argc, argv) == -1) {
4178 4178 mdb_warn("can't walk umem_cache");
4179 4179 return (DCMD_ERR);
4180 4180 }
4181 4181 return (DCMD_OK);
4182 4182 }
4183 4183
4184 4184 if (!mdb_vread(&c, sizeof (c), addr)) {
4185 4185 mdb_warn("unable to read cache at %p", addr);
4186 4186 return (DCMD_ERR);
4187 4187 }
4188 4188
4189 4189 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4190 4190 if (!(flags & DCMD_LOOP))
4191 4191 mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4192 4192 "by malloc()\n", c.cache_name);
4193 4193 skip = 1;
4194 4194 }
4195 4195
4196 4196 /*
4197 4197 * normally, print the header only the first time. In verbose mode,
4198 4198 * print the header on every non-skipped buffer
4199 4199 */
4200 4200 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4201 4201 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4202 4202 "CACHE", "BUFSZ", "MAXMAL",
4203 4203 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4204 4204
4205 4205 if (skip)
4206 4206 return (DCMD_OK);
4207 4207
4208 4208 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4209 4209 #ifdef _LP64
4210 4210 if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4211 4211 maxmalloc -= sizeof (struct malloc_data);
4212 4212 #endif
4213 4213
4214 4214 bzero(&mi, sizeof (mi));
4215 4215 mi.um_cp = &c;
4216 4216 if (verbose)
4217 4217 mi.um_bucket =
4218 4218 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4219 4219 UM_SLEEP | UM_GC);
4220 4220
4221 4221 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4222 4222 -1) {
4223 4223 mdb_warn("can't walk 'umem'");
4224 4224 return (DCMD_ERR);
4225 4225 }
4226 4226
4227 4227 overhead = mi.um_malloc_overhead;
4228 4228 allocated = mi.um_malloc_size;
4229 4229
4230 4230 /* do integer round off for the average */
4231 4231 if (mi.um_malloc != 0)
4232 4232 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4233 4233 else
4234 4234 avg_malloc = 0;
4235 4235
4236 4236 /*
4237 4237 * include per-slab overhead
4238 4238 *
4239 4239 * Each slab in a given cache is the same size, and has the same
4240 4240 * number of chunks in it; we read in the first slab on the
4241 4241 * slab list to get the number of chunks for all slabs. To
4242 4242 * compute the per-slab overhead, we just subtract the chunk usage
4243 4243 * from the slabsize:
4244 4244 *
4245 4245 * +------------+-------+-------+ ... --+-------+-------+-------+
4246 4246 * |////////////| | | ... | |///////|///////|
4247 4247 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//|
4248 4248 * |////////////| | | ... | |///////|///////|
4249 4249 * +------------+-------+-------+ ... --+-------+-------+-------+
4250 4250 * | \_______chunksize * chunks_____/ |
4251 4251 * \__________________________slabsize__________________________/
4252 4252 *
4253 4253 * For UMF_HASH caches, there is an additional source of overhead;
4254 4254 * the external umem_slab_t and per-chunk bufctl structures. We
4255 4255 * include those in our per-slab overhead.
4256 4256 *
4257 4257 * Once we have a number for the per-slab overhead, we estimate
4258 4258 * the actual overhead by treating the malloc()ed buffers as if
4259 4259 * they were densely packed:
4260 4260 *
4261 4261 * additional overhead = (# mallocs) * (per-slab) / (chunks);
4262 4262 *
4263 4263 * carefully ordering the multiply before the divide, to avoid
4264 4264 * round-off error.
4265 4265 */
4266 4266 if (mi.um_malloc != 0) {
4267 4267 umem_slab_t slab;
4268 4268 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4269 4269
4270 4270 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4271 4271 mdb_warn("unable to read slab at %p\n", saddr);
4272 4272 } else {
4273 4273 long chunks = slab.slab_chunks;
4274 4274 if (chunks != 0 && c.cache_chunksize != 0 &&
4275 4275 chunks <= c.cache_slabsize / c.cache_chunksize) {
4276 4276 uintmax_t perslab =
4277 4277 c.cache_slabsize -
4278 4278 (c.cache_chunksize * chunks);
4279 4279
4280 4280 if (c.cache_flags & UMF_HASH) {
4281 4281 perslab += sizeof (umem_slab_t) +
4282 4282 chunks *
4283 4283 ((c.cache_flags & UMF_AUDIT) ?
4284 4284 sizeof (umem_bufctl_audit_t) :
4285 4285 sizeof (umem_bufctl_t));
4286 4286 }
4287 4287 overhead +=
4288 4288 (perslab * (uintmax_t)mi.um_malloc)/chunks;
4289 4289 } else {
4290 4290 mdb_warn("invalid #chunks (%d) in slab %p\n",
4291 4291 chunks, saddr);
4292 4292 }
4293 4293 }
4294 4294 }
4295 4295
4296 4296 if (allocated != 0)
4297 4297 overhead_pct = (1000ULL * overhead) / allocated;
4298 4298 else
4299 4299 overhead_pct = 0;
4300 4300
4301 4301 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4302 4302 addr, c.cache_bufsize, maxmalloc,
4303 4303 mi.um_malloc, avg_malloc, allocated, overhead,
4304 4304 overhead_pct / 10, overhead_pct % 10);
4305 4305
4306 4306 if (!verbose)
4307 4307 return (DCMD_OK);
4308 4308
4309 4309 if (!dump)
4310 4310 mdb_printf("\n");
4311 4311
4312 4312 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4313 4313 return (DCMD_ERR);
4314 4314
4315 4315 for (idx = 0; idx < num; idx++) {
4316 4316 if (alloc_sizes[idx] == c.cache_bufsize)
4317 4317 break;
4318 4318 if (alloc_sizes[idx] == 0) {
4319 4319 idx = num; /* 0-terminated array */
4320 4320 break;
4321 4321 }
4322 4322 }
4323 4323 if (idx == num) {
4324 4324 mdb_warn(
4325 4325 "cache %p's size (%d) not in umem_alloc_sizes\n",
4326 4326 addr, c.cache_bufsize);
4327 4327 return (DCMD_ERR);
4328 4328 }
4329 4329
4330 4330 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4331 4331 if (minmalloc > 0) {
4332 4332 #ifdef _LP64
4333 4333 if (minmalloc > UMEM_SECOND_ALIGN)
4334 4334 minmalloc -= sizeof (struct malloc_data);
4335 4335 #endif
4336 4336 minmalloc -= sizeof (struct malloc_data);
4337 4337 minmalloc += 1;
4338 4338 }
4339 4339
4340 4340 if (dump) {
4341 4341 for (idx = minmalloc; idx <= maxmalloc; idx++)
4342 4342 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4343 4343 mdb_printf("\n");
4344 4344 } else {
4345 4345 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4346 4346 maxbuckets, minbucketsize, geometric);
4347 4347 }
4348 4348
4349 4349 return (DCMD_OK);
4350 4350 }
↓ open down ↓ |
2221 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX