1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Joyent, Inc.
  24  */
  25 
  26 #include <mdb/mdb_modapi.h>
  27 #include <mdb/mdb_ctf.h>
  28 #include <sys/cpuvar.h>
  29 #include <sys/systm.h>
  30 #include <sys/traptrace.h>
  31 #include <sys/x_call.h>
  32 #include <sys/xc_levels.h>
  33 #include <sys/avintr.h>
  34 #include <sys/systm.h>
  35 #include <sys/trap.h>
  36 #include <sys/mutex.h>
  37 #include <sys/mutex_impl.h>
  38 #include "i86mmu.h"
  39 #include "unix_sup.h"
  40 #include <sys/apix.h>
  41 #include <sys/x86_archext.h>
  42 #include <sys/bitmap.h>
  43 #include <sys/controlregs.h>
  44 
  45 #define TT_HDLR_WIDTH   17
  46 
  47 
  48 /* apix only */
  49 static apix_impl_t *d_apixs[NCPU];
  50 static int use_apix = 0;
  51 
  52 static int
  53 ttrace_ttr_size_check(void)
  54 {
  55         mdb_ctf_id_t ttrtid;
  56         ssize_t ttr_size;
  57 
  58         if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
  59             mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
  60                 mdb_warn("failed to determine size of trap_trace_rec_t; "
  61                     "non-TRAPTRACE kernel?\n");
  62                 return (0);
  63         }
  64 
  65         if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
  66             sizeof (trap_trace_rec_t)) {
  67                 /*
  68                  * On Intel machines, this will happen when TTR_STACK_DEPTH
  69                  * is changed.  This code could be smarter, and could
  70                  * dynamically adapt to different depths, but not until a
  71                  * need for such adaptation is demonstrated.
  72                  */
  73                 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
  74                     "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
  75                 return (0);
  76         }
  77 
  78         return (1);
  79 }
  80 
  81 int
  82 ttrace_walk_init(mdb_walk_state_t *wsp)
  83 {
  84         trap_trace_ctl_t *ttcp;
  85         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
  86         int i;
  87 
  88         if (!ttrace_ttr_size_check())
  89                 return (WALK_ERR);
  90 
  91         ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
  92 
  93         if (wsp->walk_addr != NULL) {
  94                 mdb_warn("ttrace only supports global walks\n");
  95                 return (WALK_ERR);
  96         }
  97 
  98         if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
  99                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 100                     "non-TRAPTRACE kernel?\n");
 101                 mdb_free(ttcp, ttc_size);
 102                 return (WALK_ERR);
 103         }
 104 
 105         /*
 106          * We'll poach the ttc_current pointer (which isn't used for
 107          * anything) to store a pointer to our current TRAPTRACE record.
 108          * This allows us to only keep the array of trap_trace_ctl structures
 109          * as our walker state (ttc_current may be the only kernel data
 110          * structure member added exclusively to make writing the mdb walker
 111          * a little easier).
 112          */
 113         for (i = 0; i < NCPU; i++) {
 114                 trap_trace_ctl_t *ttc = &ttcp[i];
 115 
 116                 if (ttc->ttc_first == NULL)
 117                         continue;
 118 
 119                 /*
 120                  * Assign ttc_current to be the last completed record.
 121                  * Note that the error checking (i.e. in the ttc_next ==
 122                  * ttc_first case) is performed in the step function.
 123                  */
 124                 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
 125         }
 126 
 127         wsp->walk_data = ttcp;
 128         return (WALK_NEXT);
 129 }
 130 
 131 int
 132 ttrace_walk_step(mdb_walk_state_t *wsp)
 133 {
 134         trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
 135         trap_trace_rec_t rec;
 136         int rval, i, recsize = sizeof (trap_trace_rec_t);
 137         hrtime_t latest = 0;
 138 
 139         /*
 140          * Loop through the CPUs, looking for the latest trap trace record
 141          * (we want to walk through the trap trace records in reverse
 142          * chronological order).
 143          */
 144         for (i = 0; i < NCPU; i++) {
 145                 ttc = &ttcp[i];
 146 
 147                 if (ttc->ttc_current == NULL)
 148                         continue;
 149 
 150                 if (ttc->ttc_current < ttc->ttc_first)
 151                         ttc->ttc_current = ttc->ttc_limit - recsize;
 152 
 153                 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 154                         mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 155                         return (WALK_ERR);
 156                 }
 157 
 158                 if (rec.ttr_stamp > latest) {
 159                         latest = rec.ttr_stamp;
 160                         latest_ttc = ttc;
 161                 }
 162         }
 163 
 164         if (latest == 0)
 165                 return (WALK_DONE);
 166 
 167         ttc = latest_ttc;
 168 
 169         if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 170                 mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 171                 return (WALK_ERR);
 172         }
 173 
 174         rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
 175 
 176         if (ttc->ttc_current == ttc->ttc_next)
 177                 ttc->ttc_current = NULL;
 178         else
 179                 ttc->ttc_current -= sizeof (trap_trace_rec_t);
 180 
 181         return (rval);
 182 }
 183 
 184 void
 185 ttrace_walk_fini(mdb_walk_state_t *wsp)
 186 {
 187         mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
 188 }
 189 
 190 static int
 191 ttrace_syscall(trap_trace_rec_t *rec)
 192 {
 193         GElf_Sym sym;
 194         int sysnum = rec->ttr_sysnum;
 195         uintptr_t addr;
 196         struct sysent sys;
 197 
 198         mdb_printf("%-3x", sysnum);
 199 
 200         if (rec->ttr_sysnum > NSYSCALL) {
 201                 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
 202                 return (0);
 203         }
 204 
 205         if (mdb_lookup_by_name("sysent", &sym) == -1) {
 206                 mdb_warn("\ncouldn't find 'sysent'");
 207                 return (-1);
 208         }
 209 
 210         addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
 211 
 212         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 213                 mdb_warn("\nsysnum %d out-of-range\n", sysnum);
 214                 return (-1);
 215         }
 216 
 217         if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
 218                 mdb_warn("\nfailed to read sysent at %p", addr);
 219                 return (-1);
 220         }
 221 
 222         mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
 223 
 224         return (0);
 225 }
 226 
 227 static int
 228 ttrace_interrupt(trap_trace_rec_t *rec)
 229 {
 230         GElf_Sym sym;
 231         uintptr_t addr;
 232         struct av_head hd;
 233         struct autovec av;
 234 
 235         switch (rec->ttr_regs.r_trapno) {
 236         case T_SOFTINT:
 237                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 238                 return (0);
 239         default:
 240                 break;
 241         }
 242 
 243         mdb_printf("%-3x ", rec->ttr_vector);
 244 
 245         if (mdb_lookup_by_name("autovect", &sym) == -1) {
 246                 mdb_warn("\ncouldn't find 'autovect'");
 247                 return (-1);
 248         }
 249 
 250         addr = (uintptr_t)sym.st_value +
 251             rec->ttr_vector * sizeof (struct av_head);
 252 
 253         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 254                 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
 255                 return (-1);
 256         }
 257 
 258         if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
 259                 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
 260                 return (-1);
 261         }
 262 
 263         if (hd.avh_link == NULL) {
 264                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 265                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 266                 else
 267                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 268         } else {
 269                 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
 270                         mdb_warn("couldn't read autovec at %p",
 271                             (uintptr_t)hd.avh_link);
 272                 }
 273 
 274                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 275         }
 276 
 277         return (0);
 278 }
 279 
 280 static int
 281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
 282 {
 283         struct autovec av;
 284         apix_impl_t apix;
 285         apix_vector_t apix_vector;
 286 
 287         switch (rec->ttr_regs.r_trapno) {
 288         case T_SOFTINT:
 289                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 290                 return (0);
 291         default:
 292                 break;
 293         }
 294 
 295         mdb_printf("%-3x ", rec->ttr_vector);
 296 
 297         /* Read the per CPU apix entry */
 298         if (mdb_vread(&apix, sizeof (apix_impl_t),
 299             (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
 300                 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
 301                 return (-1);
 302         }
 303         if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
 304             (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
 305                 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
 306                 return (-1);
 307         }
 308         if (apix_vector.v_share == 0) {
 309                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 310                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 311                 else
 312                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 313         } else {
 314                 if (mdb_vread(&av, sizeof (struct autovec),
 315                     (uintptr_t)(apix_vector.v_autovect)) == -1) {
 316                         mdb_warn("couldn't read autovec at %p",
 317                             (uintptr_t)apix_vector.v_autovect);
 318                 }
 319 
 320                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 321         }
 322 
 323         return (0);
 324 }
 325 
 326 
 327 static struct {
 328         int tt_trapno;
 329         char *tt_name;
 330 } ttrace_traps[] = {
 331         { T_ZERODIV,    "divide-error" },
 332         { T_SGLSTP,     "debug-exception" },
 333         { T_NMIFLT,     "nmi-interrupt" },
 334         { T_BPTFLT,     "breakpoint" },
 335         { T_OVFLW,      "into-overflow" },
 336         { T_BOUNDFLT,   "bound-exceeded" },
 337         { T_ILLINST,    "invalid-opcode" },
 338         { T_NOEXTFLT,   "device-not-avail" },
 339         { T_DBLFLT,     "double-fault" },
 340         { T_EXTOVRFLT,  "segment-overrun" },
 341         { T_TSSFLT,     "invalid-tss" },
 342         { T_SEGFLT,     "segment-not-pres" },
 343         { T_STKFLT,     "stack-fault" },
 344         { T_GPFLT,      "general-protectn" },
 345         { T_PGFLT,      "page-fault" },
 346         { T_EXTERRFLT,  "error-fault" },
 347         { T_ALIGNMENT,  "alignment-check" },
 348         { T_MCE,        "machine-check" },
 349         { T_SIMDFPE,    "sse-exception" },
 350 
 351         { T_DBGENTR,    "debug-enter" },
 352         { T_FASTTRAP,   "fasttrap-0xd2" },
 353         { T_SYSCALLINT, "syscall-0x91" },
 354         { T_DTRACE_RET, "dtrace-ret" },
 355         { T_SOFTINT,    "softint" },
 356         { T_INTERRUPT,  "interrupt" },
 357         { T_FAULT,      "fault" },
 358         { T_AST,        "ast" },
 359         { T_SYSCALL,    "syscall" },
 360 
 361         { 0,            NULL }
 362 };
 363 
 364 static int
 365 ttrace_trap(trap_trace_rec_t *rec)
 366 {
 367         int i;
 368 
 369         if (rec->ttr_regs.r_trapno == T_AST)
 370                 mdb_printf("%-3s ", "-");
 371         else
 372                 mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
 373 
 374         for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
 375                 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
 376                         break;
 377         }
 378 
 379         if (ttrace_traps[i].tt_name == NULL)
 380                 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
 381         else
 382                 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
 383 
 384         return (0);
 385 }
 386 
 387 static void
 388 ttrace_intr_detail(trap_trace_rec_t *rec)
 389 {
 390         mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
 391             rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
 392 }
 393 
 394 static struct {
 395         uchar_t t_marker;
 396         char *t_name;
 397         int (*t_hdlr)(trap_trace_rec_t *);
 398 } ttrace_hdlr[] = {
 399         { TT_SYSCALL, "sysc", ttrace_syscall },
 400         { TT_SYSENTER, "syse", ttrace_syscall },
 401         { TT_SYSC, "asys", ttrace_syscall },
 402         { TT_SYSC64, "sc64", ttrace_syscall },
 403         { TT_INTERRUPT, "intr", ttrace_interrupt },
 404         { TT_TRAP, "trap", ttrace_trap },
 405         { TT_EVENT, "evnt", ttrace_trap },
 406         { 0, NULL, NULL }
 407 };
 408 
 409 typedef struct ttrace_dcmd {
 410         processorid_t ttd_cpu;
 411         uint_t ttd_extended;
 412         trap_trace_ctl_t ttd_ttc[NCPU];
 413 } ttrace_dcmd_t;
 414 
 415 #if defined(__amd64)
 416 
 417 #define DUMP(reg) #reg, regs->r_##reg
 418 #define THREEREGS       "         %3s: %16lx %3s: %16lx %3s: %16lx\n"
 419 
 420 static void
 421 ttrace_dumpregs(trap_trace_rec_t *rec)
 422 {
 423         struct regs *regs = &rec->ttr_regs;
 424 
 425         mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
 426         mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
 427         mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
 428         mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
 429         mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
 430         mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
 431         mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
 432         mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
 433         mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
 434         mdb_printf("\n");
 435 }
 436 
 437 #else
 438 
 439 #define DUMP(reg) #reg, regs->r_##reg
 440 #define FOURREGS        "         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
 441 
 442 static void
 443 ttrace_dumpregs(trap_trace_rec_t *rec)
 444 {
 445         struct regs *regs = &rec->ttr_regs;
 446 
 447         mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
 448         mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
 449         mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
 450         mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
 451             DUMP(pc), DUMP(cs));
 452         mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
 453             "cr2", rec->ttr_cr2);
 454         mdb_printf("\n");
 455 }
 456 
 457 #endif  /* __amd64 */
 458 
 459 int
 460 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
 461 {
 462         struct regs *regs = &rec->ttr_regs;
 463         processorid_t cpu = -1, i;
 464 
 465         for (i = 0; i < NCPU; i++) {
 466                 if (addr >= dcmd->ttd_ttc[i].ttc_first &&
 467                     addr < dcmd->ttd_ttc[i].ttc_limit) {
 468                         cpu = i;
 469                         break;
 470                 }
 471         }
 472 
 473         if (cpu == -1) {
 474                 mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
 475                 return (WALK_ERR);
 476         }
 477 
 478         if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
 479                 return (WALK_NEXT);
 480 
 481         mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
 482 
 483         for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
 484                 if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
 485                         continue;
 486                 mdb_printf("%4s ", ttrace_hdlr[i].t_name);
 487                 if (ttrace_hdlr[i].t_hdlr(rec) == -1)
 488                         return (WALK_ERR);
 489         }
 490 
 491         mdb_printf(" %a\n", regs->r_pc);
 492 
 493         if (dcmd->ttd_extended == FALSE)
 494                 return (WALK_NEXT);
 495 
 496         if (rec->ttr_marker == TT_INTERRUPT)
 497                 ttrace_intr_detail(rec);
 498         else
 499                 ttrace_dumpregs(rec);
 500 
 501         if (rec->ttr_sdepth > 0) {
 502                 for (i = 0; i < rec->ttr_sdepth; i++) {
 503                         if (i >= TTR_STACK_DEPTH) {
 504                                 mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
 505                                     "should be <= %d)\n", " ", rec->ttr_sdepth,
 506                                     TTR_STACK_DEPTH);
 507                                 break;
 508                         }
 509 
 510                         mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
 511                 }
 512                 mdb_printf("\n");
 513         }
 514 
 515         return (WALK_NEXT);
 516 }
 517 
 518 int
 519 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 520 {
 521         ttrace_dcmd_t dcmd;
 522         trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
 523         trap_trace_rec_t rec;
 524         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
 525 
 526         if (!ttrace_ttr_size_check())
 527                 return (WALK_ERR);
 528 
 529         bzero(&dcmd, sizeof (dcmd));
 530         dcmd.ttd_cpu = -1;
 531         dcmd.ttd_extended = FALSE;
 532 
 533         if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
 534                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 535                     "non-TRAPTRACE kernel?\n");
 536                 return (DCMD_ERR);
 537         }
 538 
 539         if (mdb_getopts(argc, argv,
 540             'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended, NULL) != argc)
 541                 return (DCMD_USAGE);
 542 
 543         if (DCMD_HDRSPEC(flags)) {
 544                 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
 545                     "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
 546                     " EIP");
 547         }
 548 
 549         if (flags & DCMD_ADDRSPEC) {
 550                 if (addr >= NCPU) {
 551                         if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
 552                                 mdb_warn("couldn't read trap trace record "
 553                                     "at %p", addr);
 554                                 return (DCMD_ERR);
 555                         }
 556 
 557                         if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
 558                                 return (DCMD_ERR);
 559 
 560                         return (DCMD_OK);
 561                 }
 562                 dcmd.ttd_cpu = addr;
 563         }
 564 
 565         if (mdb_readvar(&use_apix, "apix_enable") == -1) {
 566                 mdb_warn("failed to read apix_enable");
 567                 use_apix = 0;
 568         }
 569 
 570         if (use_apix) {
 571                 if (mdb_readvar(&d_apixs, "apixs") == -1) {
 572                         mdb_warn("\nfailed to read apixs.");
 573                         return (DCMD_ERR);
 574                 }
 575                 /* change to apix ttrace interrupt handler */
 576                 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
 577         }
 578 
 579         if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
 580                 mdb_warn("couldn't walk 'ttrace'");
 581                 return (DCMD_ERR);
 582         }
 583 
 584         return (DCMD_OK);
 585 }
 586 
 587 /*ARGSUSED*/
 588 int
 589 mutex_owner_init(mdb_walk_state_t *wsp)
 590 {
 591         return (WALK_NEXT);
 592 }
 593 
 594 int
 595 mutex_owner_step(mdb_walk_state_t *wsp)
 596 {
 597         uintptr_t addr = wsp->walk_addr;
 598         mutex_impl_t mtx;
 599         uintptr_t owner;
 600         kthread_t thr;
 601 
 602         if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
 603                 return (WALK_ERR);
 604 
 605         if (!MUTEX_TYPE_ADAPTIVE(&mtx))
 606                 return (WALK_DONE);
 607 
 608         if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == NULL)
 609                 return (WALK_DONE);
 610 
 611         if (mdb_vread(&thr, sizeof (thr), owner) != -1)
 612                 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
 613 
 614         return (WALK_DONE);
 615 }
 616 
 617 static void
 618 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
 619 {
 620         const char *lastnm;
 621         uint_t lastval;
 622         char type[4];
 623 
 624         switch (gate->sgd_type) {
 625         case SDT_SYSIGT:
 626                 strcpy(type, "int");
 627                 break;
 628         case SDT_SYSTGT:
 629                 strcpy(type, "trp");
 630                 break;
 631         case SDT_SYSTASKGT:
 632                 strcpy(type, "tsk");
 633                 break;
 634         default:
 635                 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
 636         }
 637 
 638 #if defined(__amd64)
 639         lastnm = "IST";
 640         lastval = gate->sgd_ist;
 641 #else
 642         lastnm = "STK";
 643         lastval = gate->sgd_stkcpy;
 644 #endif
 645 
 646         if (header) {
 647                 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
 648                     "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
 649                     "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
 650         }
 651 
 652         mdb_printf("%s", label);
 653 
 654         if (gate->sgd_type == SDT_SYSTASKGT)
 655                 mdb_printf("%-30s ", "-");
 656         else
 657                 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
 658 
 659         mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
 660             gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
 661 }
 662 
 663 /*ARGSUSED*/
 664 static int
 665 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 666 {
 667         gate_desc_t gate;
 668 
 669         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
 670                 return (DCMD_USAGE);
 671 
 672         if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 673             sizeof (gate_desc_t)) {
 674                 mdb_warn("failed to read gate descriptor at %p\n", addr);
 675                 return (DCMD_ERR);
 676         }
 677 
 678         gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
 679 
 680         return (DCMD_OK);
 681 }
 682 
 683 /*ARGSUSED*/
 684 static int
 685 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 686 {
 687         int i;
 688 
 689         if (!(flags & DCMD_ADDRSPEC)) {
 690                 GElf_Sym idt0_va;
 691                 gate_desc_t *idt0;
 692 
 693                 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
 694                         mdb_warn("failed to find VA of idt0");
 695                         return (DCMD_ERR);
 696                 }
 697 
 698                 addr = idt0_va.st_value;
 699                 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
 700                         mdb_warn("failed to read idt0 at %p\n", addr);
 701                         return (DCMD_ERR);
 702                 }
 703 
 704                 addr = (uintptr_t)idt0;
 705         }
 706 
 707         for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
 708                 gate_desc_t gate;
 709                 char label[6];
 710 
 711                 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 712                     sizeof (gate_desc_t)) {
 713                         mdb_warn("failed to read gate descriptor at %p\n",
 714                             addr);
 715                         return (DCMD_ERR);
 716                 }
 717 
 718                 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
 719                 gate_desc_dump(&gate, label, i == 0);
 720         }
 721 
 722         return (DCMD_OK);
 723 }
 724 
 725 static void
 726 htables_help(void)
 727 {
 728         mdb_printf(
 729             "Given a (hat_t *), generates the list of all (htable_t *)s\n"
 730             "that correspond to that address space\n");
 731 }
 732 
 733 static void
 734 report_maps_help(void)
 735 {
 736         mdb_printf(
 737             "Given a PFN, report HAT structures that map the page, or use\n"
 738             "the page as a pagetable.\n"
 739             "\n"
 740             "-m Interpret the PFN as an MFN (machine frame number)\n");
 741 }
 742 
 743 static void
 744 ptable_help(void)
 745 {
 746         mdb_printf(
 747             "Given a PFN holding a page table, print its contents, and\n"
 748             "the address of the corresponding htable structure.\n"
 749             "\n"
 750             "-m Interpret the PFN as an MFN (machine frame number)\n");
 751 }
 752 
 753 /*
 754  * NSEC_SHIFT is replicated here (it is not defined in a header file),
 755  * but for amusement, the reader is directed to the comment that explains
 756  * the rationale for this particular value on x86.  Spoiler:  the value is
 757  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 758  * in that comment sounds too familiar, it's because your author also wrote
 759  * that code -- some fifteen years prior to this writing in 2011...)
 760  */
 761 #define NSEC_SHIFT 5
 762 
 763 /*ARGSUSED*/
 764 static int
 765 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 766 {
 767         uint32_t nsec_scale;
 768         hrtime_t tsc = addr, hrt;
 769         unsigned int *tscp = (unsigned int *)&tsc;
 770         uintptr_t scalehrtimef;
 771         uint64_t scale;
 772         GElf_Sym sym;
 773 
 774         if (!(flags & DCMD_ADDRSPEC)) {
 775                 if (argc != 1)
 776                         return (DCMD_USAGE);
 777 
 778                 switch (argv[0].a_type) {
 779                 case MDB_TYPE_STRING:
 780                         tsc = mdb_strtoull(argv[0].a_un.a_str);
 781                         break;
 782                 case MDB_TYPE_IMMEDIATE:
 783                         tsc = argv[0].a_un.a_val;
 784                         break;
 785                 default:
 786                         return (DCMD_USAGE);
 787                 }
 788         }
 789 
 790         if (mdb_readsym(&scalehrtimef,
 791             sizeof (scalehrtimef), "scalehrtimef") == -1) {
 792                 mdb_warn("couldn't read 'scalehrtimef'");
 793                 return (DCMD_ERR);
 794         }
 795 
 796         if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
 797                 mdb_warn("couldn't find 'tsc_scalehrtime'");
 798                 return (DCMD_ERR);
 799         }
 800 
 801         if (sym.st_value != scalehrtimef) {
 802                 mdb_warn("::scalehrtime requires that scalehrtimef "
 803                     "be set to tsc_scalehrtime\n");
 804                 return (DCMD_ERR);
 805         }
 806 
 807         if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 808                 mdb_warn("couldn't read 'nsec_scale'");
 809                 return (DCMD_ERR);
 810         }
 811 
 812         scale = (uint64_t)nsec_scale;
 813 
 814         hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 815         hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 816 
 817         mdb_printf("0x%llx\n", hrt);
 818 
 819         return (DCMD_OK);
 820 }
 821 
 822 /*
 823  * The x86 feature set is implemented as a bitmap array. That bitmap array is
 824  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 825  * macro. We have the names for each of these features in unix's text segment
 826  * so we do not have to duplicate them and instead just look them up.
 827  */
 828 /*ARGSUSED*/
 829 static int
 830 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
 831     const mdb_arg_t *argv)
 832 {
 833         void *fset;
 834         GElf_Sym sym;
 835         uintptr_t nptr;
 836         char name[128];
 837         int ii;
 838 
 839         size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
 840 
 841         if (argc != 0)
 842                 return (DCMD_USAGE);
 843 
 844         if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
 845                 mdb_warn("couldn't find x86_feature_names");
 846                 return (DCMD_ERR);
 847         }
 848 
 849         fset = mdb_zalloc(sz, UM_NOSLEEP);
 850         if (fset == NULL) {
 851                 mdb_warn("failed to allocate memory for x86_featureset");
 852                 return (DCMD_ERR);
 853         }
 854 
 855         if (mdb_readvar(fset, "x86_featureset") != sz) {
 856                 mdb_warn("failed to read x86_featureset");
 857                 mdb_free(fset, sz);
 858                 return (DCMD_ERR);
 859         }
 860 
 861         for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
 862                 if (!BT_TEST((ulong_t *)fset, ii))
 863                         continue;
 864 
 865                 if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
 866                     sizeof (void *) * ii) != sizeof (char *)) {
 867                         mdb_warn("failed to read feature array %d", ii);
 868                         mdb_free(fset, sz);
 869                         return (DCMD_ERR);
 870                 }
 871 
 872                 if (mdb_readstr(name, sizeof (name), nptr) == -1) {
 873                         mdb_warn("failed to read feature %d", ii);
 874                         mdb_free(fset, sz);
 875                         return (DCMD_ERR);
 876                 }
 877                 mdb_printf("%s\n", name);
 878         }
 879 
 880         mdb_free(fset, sz);
 881         return (DCMD_OK);
 882 }
 883 
 884 #ifdef _KMDB
 885 /* ARGSUSED */
 886 static int
 887 crregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 888 {
 889         ulong_t cr0, cr4;
 890         static const mdb_bitmask_t cr0_flag_bits[] = {
 891                 { "PE",         CR0_PE,         CR0_PE },
 892                 { "MP",         CR0_MP,         CR0_MP },
 893                 { "EM",         CR0_EM,         CR0_EM },
 894                 { "TS",         CR0_TS,         CR0_TS },
 895                 { "ET",         CR0_ET,         CR0_ET },
 896                 { "NE",         CR0_NE,         CR0_NE },
 897                 { "WP",         CR0_WP,         CR0_WP },
 898                 { "AM",         CR0_AM,         CR0_AM },
 899                 { "NW",         CR0_NW,         CR0_NW },
 900                 { "CD",         CR0_CD,         CR0_CD },
 901                 { "PG",         CR0_PG,         CR0_PG },
 902                 { NULL,         0,              0 }
 903         };
 904 
 905         static const mdb_bitmask_t cr4_flag_bits[] = {
 906                 { "VME",        CR4_VME,        CR4_VME },
 907                 { "PVI",        CR4_PVI,        CR4_PVI },
 908                 { "TSD",        CR4_TSD,        CR4_TSD },
 909                 { "DE",         CR4_DE,         CR4_DE },
 910                 { "PSE",        CR4_PSE,        CR4_PSE },
 911                 { "PAE",        CR4_PAE,        CR4_PAE },
 912                 { "MCE",        CR4_MCE,        CR4_MCE },
 913                 { "PGE",        CR4_PGE,        CR4_PGE },
 914                 { "PCE",        CR4_PCE,        CR4_PCE },
 915                 { "OSFXSR",     CR4_OSFXSR,     CR4_OSFXSR },
 916                 { "OSXMMEXCPT", CR4_OSXMMEXCPT, CR4_OSXMMEXCPT },
 917                 { "VMXE",       CR4_VMXE,       CR4_VMXE },
 918                 { "SMXE",       CR4_SMXE,       CR4_SMXE },
 919                 { "OSXSAVE",    CR4_OSXSAVE,    CR4_OSXSAVE },
 920                 { "SMEP",       CR4_SMEP,       CR4_SMEP },
 921                 { "SMAP",       CR4_SMAP,       CR4_SMAP },
 922                 { NULL,         0,              0 }
 923         };
 924 
 925         cr0 = kmdb_unix_getcr0();
 926         cr4 = kmdb_unix_getcr4();
 927         mdb_printf("%%cr0 = 0x%08x <%b>\n", cr0, cr0, cr0_flag_bits);
 928         mdb_printf("%%cr4 = 0x%08x <%b>\n", cr4, cr4, cr4_flag_bits);
 929         return (DCMD_OK);
 930 }
 931 #endif
 932 
 933 static const mdb_dcmd_t dcmds[] = {
 934         { "gate_desc", ":", "dump a gate descriptor", gate_desc },
 935         { "idt", ":[-v]", "dump an IDT", idt },
 936         { "ttrace", "[-x]", "dump trap trace buffers", ttrace },
 937         { "vatopfn", ":[-a as]", "translate address to physical page",
 938             va2pfn_dcmd },
 939         { "report_maps", ":[-m]",
 940             "Given PFN, report mappings / page table usage",
 941             report_maps_dcmd, report_maps_help },
 942         { "htables", "", "Given hat_t *, lists all its htable_t * values",
 943             htables_dcmd, htables_help },
 944         { "ptable", ":[-m]", "Given PFN, dump contents of a page table",
 945             ptable_dcmd, ptable_help },
 946         { "pte", ":[-p XXXXX] [-l N]", "print human readable page table entry",
 947             pte_dcmd },
 948         { "pfntomfn", ":", "convert physical page to hypervisor machine page",
 949             pfntomfn_dcmd },
 950         { "mfntopfn", ":", "convert hypervisor machine page to physical page",
 951             mfntopfn_dcmd },
 952         { "memseg_list", ":", "show memseg list", memseg_list },
 953         { "scalehrtime", ":",
 954             "scale an unscaled high-res time", scalehrtime_cmd },
 955         { "x86_featureset", NULL, "dump the x86_featureset vector",
 956                 x86_featureset_cmd },
 957 #ifdef _KMDB
 958         { "crregs", NULL, "dump control registers", crregs_dcmd },
 959 #endif
 960         { NULL }
 961 };
 962 
 963 static const mdb_walker_t walkers[] = {
 964         { "ttrace", "walks trap trace buffers in reverse chronological order",
 965                 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
 966         { "mutex_owner", "walks the owner of a mutex",
 967                 mutex_owner_init, mutex_owner_step },
 968         { "memseg", "walk the memseg structures",
 969                 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
 970         { NULL }
 971 };
 972 
 973 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
 974 
 975 const mdb_modinfo_t *
 976 _mdb_init(void)
 977 {
 978         return (&modinfo);
 979 }
 980 
 981 void
 982 _mdb_fini(void)
 983 {
 984         free_mmu();
 985 }