1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2018 Joyent, Inc.
  24  */
  25 
  26 #include <mdb/mdb_modapi.h>
  27 #include <mdb/mdb_ctf.h>
  28 #include <sys/cpuvar.h>
  29 #include <sys/systm.h>
  30 #include <sys/traptrace.h>
  31 #include <sys/x_call.h>
  32 #include <sys/xc_levels.h>
  33 #include <sys/avintr.h>
  34 #include <sys/systm.h>
  35 #include <sys/trap.h>
  36 #include <sys/mutex.h>
  37 #include <sys/mutex_impl.h>
  38 #include "i86mmu.h"
  39 #include "unix_sup.h"
  40 #include <sys/apix.h>
  41 #include <sys/x86_archext.h>
  42 #include <sys/bitmap.h>
  43 #include <sys/controlregs.h>
  44 
  45 #define TT_HDLR_WIDTH   17
  46 
  47 
  48 /* apix only */
  49 static apix_impl_t *d_apixs[NCPU];
  50 static int use_apix = 0;
  51 
  52 static int
  53 ttrace_ttr_size_check(void)
  54 {
  55         mdb_ctf_id_t ttrtid;
  56         ssize_t ttr_size;
  57 
  58         if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
  59             mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
  60                 mdb_warn("failed to determine size of trap_trace_rec_t; "
  61                     "non-TRAPTRACE kernel?\n");
  62                 return (0);
  63         }
  64 
  65         if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
  66             sizeof (trap_trace_rec_t)) {
  67                 /*
  68                  * On Intel machines, this will happen when TTR_STACK_DEPTH
  69                  * is changed.  This code could be smarter, and could
  70                  * dynamically adapt to different depths, but not until a
  71                  * need for such adaptation is demonstrated.
  72                  */
  73                 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
  74                     "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
  75                 return (0);
  76         }
  77 
  78         return (1);
  79 }
  80 
  81 int
  82 ttrace_walk_init(mdb_walk_state_t *wsp)
  83 {
  84         trap_trace_ctl_t *ttcp;
  85         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
  86         int i;
  87 
  88         if (!ttrace_ttr_size_check())
  89                 return (WALK_ERR);
  90 
  91         ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
  92 
  93         if (wsp->walk_addr != NULL) {
  94                 mdb_warn("ttrace only supports global walks\n");
  95                 return (WALK_ERR);
  96         }
  97 
  98         if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
  99                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 100                     "non-TRAPTRACE kernel?\n");
 101                 mdb_free(ttcp, ttc_size);
 102                 return (WALK_ERR);
 103         }
 104 
 105         /*
 106          * We'll poach the ttc_current pointer (which isn't used for
 107          * anything) to store a pointer to our current TRAPTRACE record.
 108          * This allows us to only keep the array of trap_trace_ctl structures
 109          * as our walker state (ttc_current may be the only kernel data
 110          * structure member added exclusively to make writing the mdb walker
 111          * a little easier).
 112          */
 113         for (i = 0; i < NCPU; i++) {
 114                 trap_trace_ctl_t *ttc = &ttcp[i];
 115 
 116                 if (ttc->ttc_first == NULL)
 117                         continue;
 118 
 119                 /*
 120                  * Assign ttc_current to be the last completed record.
 121                  * Note that the error checking (i.e. in the ttc_next ==
 122                  * ttc_first case) is performed in the step function.
 123                  */
 124                 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
 125         }
 126 
 127         wsp->walk_data = ttcp;
 128         return (WALK_NEXT);
 129 }
 130 
 131 int
 132 ttrace_walk_step(mdb_walk_state_t *wsp)
 133 {
 134         trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
 135         trap_trace_rec_t rec;
 136         int rval, i, recsize = sizeof (trap_trace_rec_t);
 137         hrtime_t latest = 0;
 138 
 139         /*
 140          * Loop through the CPUs, looking for the latest trap trace record
 141          * (we want to walk through the trap trace records in reverse
 142          * chronological order).
 143          */
 144         for (i = 0; i < NCPU; i++) {
 145                 ttc = &ttcp[i];
 146 
 147                 if (ttc->ttc_current == NULL)
 148                         continue;
 149 
 150                 if (ttc->ttc_current < ttc->ttc_first)
 151                         ttc->ttc_current = ttc->ttc_limit - recsize;
 152 
 153                 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 154                         mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 155                         return (WALK_ERR);
 156                 }
 157 
 158                 if (rec.ttr_stamp > latest) {
 159                         latest = rec.ttr_stamp;
 160                         latest_ttc = ttc;
 161                 }
 162         }
 163 
 164         if (latest == 0)
 165                 return (WALK_DONE);
 166 
 167         ttc = latest_ttc;
 168 
 169         if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 170                 mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 171                 return (WALK_ERR);
 172         }
 173 
 174         rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
 175 
 176         if (ttc->ttc_current == ttc->ttc_next)
 177                 ttc->ttc_current = NULL;
 178         else
 179                 ttc->ttc_current -= sizeof (trap_trace_rec_t);
 180 
 181         return (rval);
 182 }
 183 
 184 void
 185 ttrace_walk_fini(mdb_walk_state_t *wsp)
 186 {
 187         mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
 188 }
 189 
 190 static int
 191 ttrace_syscall(trap_trace_rec_t *rec)
 192 {
 193         GElf_Sym sym;
 194         int sysnum = rec->ttr_sysnum;
 195         uintptr_t addr;
 196         struct sysent sys;
 197 
 198         mdb_printf("%-3x", sysnum);
 199 
 200         if (rec->ttr_sysnum > NSYSCALL) {
 201                 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
 202                 return (0);
 203         }
 204 
 205         if (mdb_lookup_by_name("sysent", &sym) == -1) {
 206                 mdb_warn("\ncouldn't find 'sysent'");
 207                 return (-1);
 208         }
 209 
 210         addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
 211 
 212         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 213                 mdb_warn("\nsysnum %d out-of-range\n", sysnum);
 214                 return (-1);
 215         }
 216 
 217         if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
 218                 mdb_warn("\nfailed to read sysent at %p", addr);
 219                 return (-1);
 220         }
 221 
 222         mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
 223 
 224         return (0);
 225 }
 226 
 227 static int
 228 ttrace_interrupt(trap_trace_rec_t *rec)
 229 {
 230         GElf_Sym sym;
 231         uintptr_t addr;
 232         struct av_head hd;
 233         struct autovec av;
 234 
 235         switch (rec->ttr_regs.r_trapno) {
 236         case T_SOFTINT:
 237                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 238                 return (0);
 239         default:
 240                 break;
 241         }
 242 
 243         mdb_printf("%-3x ", rec->ttr_vector);
 244 
 245         if (mdb_lookup_by_name("autovect", &sym) == -1) {
 246                 mdb_warn("\ncouldn't find 'autovect'");
 247                 return (-1);
 248         }
 249 
 250         addr = (uintptr_t)sym.st_value +
 251             rec->ttr_vector * sizeof (struct av_head);
 252 
 253         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 254                 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
 255                 return (-1);
 256         }
 257 
 258         if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
 259                 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
 260                 return (-1);
 261         }
 262 
 263         if (hd.avh_link == NULL) {
 264                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 265                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 266                 else
 267                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 268         } else {
 269                 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
 270                         mdb_warn("couldn't read autovec at %p",
 271                             (uintptr_t)hd.avh_link);
 272                 }
 273 
 274                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 275         }
 276 
 277         return (0);
 278 }
 279 
 280 static int
 281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
 282 {
 283         struct autovec av;
 284         apix_impl_t apix;
 285         apix_vector_t apix_vector;
 286 
 287         switch (rec->ttr_regs.r_trapno) {
 288         case T_SOFTINT:
 289                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 290                 return (0);
 291         default:
 292                 break;
 293         }
 294 
 295         mdb_printf("%-3x ", rec->ttr_vector);
 296 
 297         /* Read the per CPU apix entry */
 298         if (mdb_vread(&apix, sizeof (apix_impl_t),
 299             (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
 300                 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
 301                 return (-1);
 302         }
 303         if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
 304             (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
 305                 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
 306                 return (-1);
 307         }
 308         if (apix_vector.v_share == 0) {
 309                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 310                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 311                 else
 312                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 313         } else {
 314                 if (mdb_vread(&av, sizeof (struct autovec),
 315                     (uintptr_t)(apix_vector.v_autovect)) == -1) {
 316                         mdb_warn("couldn't read autovec at %p",
 317                             (uintptr_t)apix_vector.v_autovect);
 318                 }
 319 
 320                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 321         }
 322 
 323         return (0);
 324 }
 325 
 326 
 327 static struct {
 328         int tt_trapno;
 329         char *tt_name;
 330 } ttrace_traps[] = {
 331         { T_ZERODIV,    "divide-error" },
 332         { T_SGLSTP,     "debug-exception" },
 333         { T_NMIFLT,     "nmi-interrupt" },
 334         { T_BPTFLT,     "breakpoint" },
 335         { T_OVFLW,      "into-overflow" },
 336         { T_BOUNDFLT,   "bound-exceeded" },
 337         { T_ILLINST,    "invalid-opcode" },
 338         { T_NOEXTFLT,   "device-not-avail" },
 339         { T_DBLFLT,     "double-fault" },
 340         { T_EXTOVRFLT,  "segment-overrun" },
 341         { T_TSSFLT,     "invalid-tss" },
 342         { T_SEGFLT,     "segment-not-pres" },
 343         { T_STKFLT,     "stack-fault" },
 344         { T_GPFLT,      "general-protectn" },
 345         { T_PGFLT,      "page-fault" },
 346         { T_EXTERRFLT,  "error-fault" },
 347         { T_ALIGNMENT,  "alignment-check" },
 348         { T_MCE,        "machine-check" },
 349         { T_SIMDFPE,    "sse-exception" },
 350 
 351         { T_DBGENTR,    "debug-enter" },
 352         { T_FASTTRAP,   "fasttrap-0xd2" },
 353         { T_SYSCALLINT, "syscall-0x91" },
 354         { T_DTRACE_RET, "dtrace-ret" },
 355         { T_SOFTINT,    "softint" },
 356         { T_INTERRUPT,  "interrupt" },
 357         { T_FAULT,      "fault" },
 358         { T_AST,        "ast" },
 359         { T_SYSCALL,    "syscall" },
 360 
 361         { 0,            NULL }
 362 };
 363 
 364 static int
 365 ttrace_trap(trap_trace_rec_t *rec)
 366 {
 367         int i;
 368 
 369         if (rec->ttr_regs.r_trapno == T_AST)
 370                 mdb_printf("%-3s ", "-");
 371         else
 372                 mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
 373 
 374         for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
 375                 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
 376                         break;
 377         }
 378 
 379         if (ttrace_traps[i].tt_name == NULL)
 380                 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
 381         else
 382                 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
 383 
 384         return (0);
 385 }
 386 
 387 static void
 388 ttrace_intr_detail(trap_trace_rec_t *rec)
 389 {
 390         mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
 391             rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
 392 }
 393 
 394 static struct {
 395         uchar_t t_marker;
 396         char *t_name;
 397         int (*t_hdlr)(trap_trace_rec_t *);
 398 } ttrace_hdlr[] = {
 399         { TT_SYSCALL, "sysc", ttrace_syscall },
 400         { TT_SYSENTER, "syse", ttrace_syscall },
 401         { TT_SYSC, "asys", ttrace_syscall },
 402         { TT_SYSC64, "sc64", ttrace_syscall },
 403         { TT_INTERRUPT, "intr", ttrace_interrupt },
 404         { TT_TRAP, "trap", ttrace_trap },
 405         { TT_EVENT, "evnt", ttrace_trap },
 406         { 0, NULL, NULL }
 407 };
 408 
 409 typedef struct ttrace_dcmd {
 410         processorid_t ttd_cpu;
 411         uint_t ttd_extended;
 412         uintptr_t ttd_kthread;
 413         trap_trace_ctl_t ttd_ttc[NCPU];
 414 } ttrace_dcmd_t;
 415 
 416 #if defined(__amd64)
 417 
 418 #define DUMP(reg) #reg, regs->r_##reg
 419 #define THREEREGS       "         %3s: %16lx %3s: %16lx %3s: %16lx\n"
 420 
 421 static void
 422 ttrace_dumpregs(trap_trace_rec_t *rec)
 423 {
 424         struct regs *regs = &rec->ttr_regs;
 425 
 426         mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
 427         mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
 428         mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
 429         mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
 430         mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
 431         mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
 432         mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
 433         mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
 434         mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
 435         mdb_printf("         %3s: %16lx %3s: %16lx\n",
 436             "fsb", regs->__r_fsbase,
 437             "gsb", regs->__r_gsbase);
 438         mdb_printf("\n");
 439 }
 440 
 441 #else
 442 
 443 #define DUMP(reg) #reg, regs->r_##reg
 444 #define FOURREGS        "         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
 445 
 446 static void
 447 ttrace_dumpregs(trap_trace_rec_t *rec)
 448 {
 449         struct regs *regs = &rec->ttr_regs;
 450 
 451         mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
 452         mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
 453         mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
 454         mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
 455             DUMP(pc), DUMP(cs));
 456         mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
 457             "cr2", rec->ttr_cr2);
 458         mdb_printf("\n");
 459 }
 460 
 461 #endif  /* __amd64 */
 462 
 463 int
 464 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
 465 {
 466         struct regs *regs = &rec->ttr_regs;
 467         processorid_t cpu = -1, i;
 468 
 469         for (i = 0; i < NCPU; i++) {
 470                 if (addr >= dcmd->ttd_ttc[i].ttc_first &&
 471                     addr < dcmd->ttd_ttc[i].ttc_limit) {
 472                         cpu = i;
 473                         break;
 474                 }
 475         }
 476 
 477         if (cpu == -1) {
 478                 mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
 479                 return (WALK_ERR);
 480         }
 481 
 482         if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
 483                 return (WALK_NEXT);
 484 
 485         if (dcmd->ttd_kthread != 0 &&
 486             dcmd->ttd_kthread != rec->ttr_curthread)
 487                 return (WALK_NEXT);
 488 
 489         mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
 490 
 491         for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
 492                 if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
 493                         continue;
 494                 mdb_printf("%4s ", ttrace_hdlr[i].t_name);
 495                 if (ttrace_hdlr[i].t_hdlr(rec) == -1)
 496                         return (WALK_ERR);
 497         }
 498 
 499         mdb_printf(" %a\n", regs->r_pc);
 500 
 501         if (dcmd->ttd_extended == FALSE)
 502                 return (WALK_NEXT);
 503 
 504         if (rec->ttr_marker == TT_INTERRUPT)
 505                 ttrace_intr_detail(rec);
 506         else
 507                 ttrace_dumpregs(rec);
 508 
 509         if (rec->ttr_sdepth > 0) {
 510                 for (i = 0; i < rec->ttr_sdepth; i++) {
 511                         if (i >= TTR_STACK_DEPTH) {
 512                                 mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
 513                                     "should be <= %d)\n", " ", rec->ttr_sdepth,
 514                                     TTR_STACK_DEPTH);
 515                                 break;
 516                         }
 517 
 518                         mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
 519                 }
 520                 mdb_printf("\n");
 521         }
 522 
 523         return (WALK_NEXT);
 524 }
 525 
 526 int
 527 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 528 {
 529         ttrace_dcmd_t dcmd;
 530         trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
 531         trap_trace_rec_t rec;
 532         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
 533 
 534         if (!ttrace_ttr_size_check())
 535                 return (WALK_ERR);
 536 
 537         bzero(&dcmd, sizeof (dcmd));
 538         dcmd.ttd_cpu = -1;
 539         dcmd.ttd_extended = FALSE;
 540 
 541         if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
 542                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 543                     "non-TRAPTRACE kernel?\n");
 544                 return (DCMD_ERR);
 545         }
 546 
 547         if (mdb_getopts(argc, argv,
 548             'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
 549             't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
 550                 return (DCMD_USAGE);
 551 
 552         if (DCMD_HDRSPEC(flags)) {
 553                 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
 554                     "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
 555                     " EIP");
 556         }
 557 
 558         if (flags & DCMD_ADDRSPEC) {
 559                 if (addr >= NCPU) {
 560                         if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
 561                                 mdb_warn("couldn't read trap trace record "
 562                                     "at %p", addr);
 563                                 return (DCMD_ERR);
 564                         }
 565 
 566                         if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
 567                                 return (DCMD_ERR);
 568 
 569                         return (DCMD_OK);
 570                 }
 571                 dcmd.ttd_cpu = addr;
 572         }
 573 
 574         if (mdb_readvar(&use_apix, "apix_enable") == -1) {
 575                 mdb_warn("failed to read apix_enable");
 576                 use_apix = 0;
 577         }
 578 
 579         if (use_apix) {
 580                 if (mdb_readvar(&d_apixs, "apixs") == -1) {
 581                         mdb_warn("\nfailed to read apixs.");
 582                         return (DCMD_ERR);
 583                 }
 584                 /* change to apix ttrace interrupt handler */
 585                 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
 586         }
 587 
 588         if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
 589                 mdb_warn("couldn't walk 'ttrace'");
 590                 return (DCMD_ERR);
 591         }
 592 
 593         return (DCMD_OK);
 594 }
 595 
 596 /*ARGSUSED*/
 597 int
 598 mutex_owner_init(mdb_walk_state_t *wsp)
 599 {
 600         return (WALK_NEXT);
 601 }
 602 
 603 int
 604 mutex_owner_step(mdb_walk_state_t *wsp)
 605 {
 606         uintptr_t addr = wsp->walk_addr;
 607         mutex_impl_t mtx;
 608         uintptr_t owner;
 609         kthread_t thr;
 610 
 611         if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
 612                 return (WALK_ERR);
 613 
 614         if (!MUTEX_TYPE_ADAPTIVE(&mtx))
 615                 return (WALK_DONE);
 616 
 617         if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == NULL)
 618                 return (WALK_DONE);
 619 
 620         if (mdb_vread(&thr, sizeof (thr), owner) != -1)
 621                 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
 622 
 623         return (WALK_DONE);
 624 }
 625 
 626 static void
 627 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
 628 {
 629         const char *lastnm;
 630         uint_t lastval;
 631         char type[4];
 632 
 633         switch (gate->sgd_type) {
 634         case SDT_SYSIGT:
 635                 strcpy(type, "int");
 636                 break;
 637         case SDT_SYSTGT:
 638                 strcpy(type, "trp");
 639                 break;
 640         case SDT_SYSTASKGT:
 641                 strcpy(type, "tsk");
 642                 break;
 643         default:
 644                 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
 645         }
 646 
 647 #if defined(__amd64)
 648         lastnm = "IST";
 649         lastval = gate->sgd_ist;
 650 #else
 651         lastnm = "STK";
 652         lastval = gate->sgd_stkcpy;
 653 #endif
 654 
 655         if (header) {
 656                 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
 657                     "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
 658                     "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
 659         }
 660 
 661         mdb_printf("%s", label);
 662 
 663         if (gate->sgd_type == SDT_SYSTASKGT)
 664                 mdb_printf("%-30s ", "-");
 665         else
 666                 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
 667 
 668         mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
 669             gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
 670 }
 671 
 672 /*ARGSUSED*/
 673 static int
 674 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 675 {
 676         gate_desc_t gate;
 677 
 678         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
 679                 return (DCMD_USAGE);
 680 
 681         if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 682             sizeof (gate_desc_t)) {
 683                 mdb_warn("failed to read gate descriptor at %p\n", addr);
 684                 return (DCMD_ERR);
 685         }
 686 
 687         gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
 688 
 689         return (DCMD_OK);
 690 }
 691 
 692 /*ARGSUSED*/
 693 static int
 694 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 695 {
 696         int i;
 697 
 698         if (!(flags & DCMD_ADDRSPEC)) {
 699                 GElf_Sym idt0_va;
 700                 gate_desc_t *idt0;
 701 
 702                 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
 703                         mdb_warn("failed to find VA of idt0");
 704                         return (DCMD_ERR);
 705                 }
 706 
 707                 addr = idt0_va.st_value;
 708                 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
 709                         mdb_warn("failed to read idt0 at %p\n", addr);
 710                         return (DCMD_ERR);
 711                 }
 712 
 713                 addr = (uintptr_t)idt0;
 714         }
 715 
 716         for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
 717                 gate_desc_t gate;
 718                 char label[6];
 719 
 720                 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 721                     sizeof (gate_desc_t)) {
 722                         mdb_warn("failed to read gate descriptor at %p\n",
 723                             addr);
 724                         return (DCMD_ERR);
 725                 }
 726 
 727                 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
 728                 gate_desc_dump(&gate, label, i == 0);
 729         }
 730 
 731         return (DCMD_OK);
 732 }
 733 
 734 static void
 735 htables_help(void)
 736 {
 737         mdb_printf(
 738             "Given a (hat_t *), generates the list of all (htable_t *)s\n"
 739             "that correspond to that address space\n");
 740 }
 741 
 742 static void
 743 report_maps_help(void)
 744 {
 745         mdb_printf(
 746             "Given a PFN, report HAT structures that map the page, or use\n"
 747             "the page as a pagetable.\n"
 748             "\n"
 749             "-m Interpret the PFN as an MFN (machine frame number)\n");
 750 }
 751 
 752 static void
 753 ptable_help(void)
 754 {
 755         mdb_printf(
 756             "Given a PFN holding a page table, print its contents, and\n"
 757             "the address of the corresponding htable structure.\n"
 758             "\n"
 759             "-m Interpret the PFN as an MFN (machine frame number)\n"
 760             "-l force page table level (3 is top)\n");
 761 }
 762 
 763 static void
 764 ptmap_help(void)
 765 {
 766         mdb_printf(
 767             "Report all mappings represented by the page table hierarchy\n"
 768             "rooted at the given cr3 value / physical address.\n"
 769             "\n"
 770             "-w run ::whatis on mapping start addresses\n");
 771 }
 772 
 773 /*
 774  * NSEC_SHIFT is replicated here (it is not defined in a header file),
 775  * but for amusement, the reader is directed to the comment that explains
 776  * the rationale for this particular value on x86.  Spoiler:  the value is
 777  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 778  * in that comment sounds too familiar, it's because your author also wrote
 779  * that code -- some fifteen years prior to this writing in 2011...)
 780  */
 781 #define NSEC_SHIFT 5
 782 
 783 /*ARGSUSED*/
 784 static int
 785 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 786 {
 787         uint32_t nsec_scale;
 788         hrtime_t tsc = addr, hrt;
 789         unsigned int *tscp = (unsigned int *)&tsc;
 790         uintptr_t scalehrtimef;
 791         uint64_t scale;
 792         GElf_Sym sym;
 793 
 794         if (!(flags & DCMD_ADDRSPEC)) {
 795                 if (argc != 1)
 796                         return (DCMD_USAGE);
 797 
 798                 switch (argv[0].a_type) {
 799                 case MDB_TYPE_STRING:
 800                         tsc = mdb_strtoull(argv[0].a_un.a_str);
 801                         break;
 802                 case MDB_TYPE_IMMEDIATE:
 803                         tsc = argv[0].a_un.a_val;
 804                         break;
 805                 default:
 806                         return (DCMD_USAGE);
 807                 }
 808         }
 809 
 810         if (mdb_readsym(&scalehrtimef,
 811             sizeof (scalehrtimef), "scalehrtimef") == -1) {
 812                 mdb_warn("couldn't read 'scalehrtimef'");
 813                 return (DCMD_ERR);
 814         }
 815 
 816         if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
 817                 mdb_warn("couldn't find 'tsc_scalehrtime'");
 818                 return (DCMD_ERR);
 819         }
 820 
 821         if (sym.st_value != scalehrtimef) {
 822                 mdb_warn("::scalehrtime requires that scalehrtimef "
 823                     "be set to tsc_scalehrtime\n");
 824                 return (DCMD_ERR);
 825         }
 826 
 827         if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 828                 mdb_warn("couldn't read 'nsec_scale'");
 829                 return (DCMD_ERR);
 830         }
 831 
 832         scale = (uint64_t)nsec_scale;
 833 
 834         hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 835         hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 836 
 837         mdb_printf("0x%llx\n", hrt);
 838 
 839         return (DCMD_OK);
 840 }
 841 
 842 /*
 843  * The x86 feature set is implemented as a bitmap array. That bitmap array is
 844  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 845  * macro. We have the names for each of these features in unix's text segment
 846  * so we do not have to duplicate them and instead just look them up.
 847  */
 848 /*ARGSUSED*/
 849 static int
 850 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
 851     const mdb_arg_t *argv)
 852 {
 853         void *fset;
 854         GElf_Sym sym;
 855         uintptr_t nptr;
 856         char name[128];
 857         int ii;
 858 
 859         size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
 860 
 861         if (argc != 0)
 862                 return (DCMD_USAGE);
 863 
 864         if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
 865                 mdb_warn("couldn't find x86_feature_names");
 866                 return (DCMD_ERR);
 867         }
 868 
 869         fset = mdb_zalloc(sz, UM_NOSLEEP);
 870         if (fset == NULL) {
 871                 mdb_warn("failed to allocate memory for x86_featureset");
 872                 return (DCMD_ERR);
 873         }
 874 
 875         if (mdb_readvar(fset, "x86_featureset") != sz) {
 876                 mdb_warn("failed to read x86_featureset");
 877                 mdb_free(fset, sz);
 878                 return (DCMD_ERR);
 879         }
 880 
 881         for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
 882                 if (!BT_TEST((ulong_t *)fset, ii))
 883                         continue;
 884 
 885                 if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
 886                     sizeof (void *) * ii) != sizeof (char *)) {
 887                         mdb_warn("failed to read feature array %d", ii);
 888                         mdb_free(fset, sz);
 889                         return (DCMD_ERR);
 890                 }
 891 
 892                 if (mdb_readstr(name, sizeof (name), nptr) == -1) {
 893                         mdb_warn("failed to read feature %d", ii);
 894                         mdb_free(fset, sz);
 895                         return (DCMD_ERR);
 896                 }
 897                 mdb_printf("%s\n", name);
 898         }
 899 
 900         mdb_free(fset, sz);
 901         return (DCMD_OK);
 902 }
 903 
 904 #ifdef _KMDB
 905 /* ARGSUSED */
 906 static int
 907 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 908 {
 909         ulong_t cr0, cr2, cr3, cr4;
 910         desctbr_t gdtr;
 911 
 912         static const mdb_bitmask_t cr0_flag_bits[] = {
 913                 { "PE",         CR0_PE,         CR0_PE },
 914                 { "MP",         CR0_MP,         CR0_MP },
 915                 { "EM",         CR0_EM,         CR0_EM },
 916                 { "TS",         CR0_TS,         CR0_TS },
 917                 { "ET",         CR0_ET,         CR0_ET },
 918                 { "NE",         CR0_NE,         CR0_NE },
 919                 { "WP",         CR0_WP,         CR0_WP },
 920                 { "AM",         CR0_AM,         CR0_AM },
 921                 { "NW",         CR0_NW,         CR0_NW },
 922                 { "CD",         CR0_CD,         CR0_CD },
 923                 { "PG",         CR0_PG,         CR0_PG },
 924                 { NULL,         0,              0 }
 925         };
 926 
 927         static const mdb_bitmask_t cr3_flag_bits[] = {
 928                 { "PCD",        CR3_PCD,        CR3_PCD },
 929                 { "PWT",        CR3_PWT,        CR3_PWT },
 930                 { NULL,         0,              0, }
 931         };
 932 
 933         static const mdb_bitmask_t cr4_flag_bits[] = {
 934                 { "VME",        CR4_VME,        CR4_VME },
 935                 { "PVI",        CR4_PVI,        CR4_PVI },
 936                 { "TSD",        CR4_TSD,        CR4_TSD },
 937                 { "DE",         CR4_DE,         CR4_DE },
 938                 { "PSE",        CR4_PSE,        CR4_PSE },
 939                 { "PAE",        CR4_PAE,        CR4_PAE },
 940                 { "MCE",        CR4_MCE,        CR4_MCE },
 941                 { "PGE",        CR4_PGE,        CR4_PGE },
 942                 { "PCE",        CR4_PCE,        CR4_PCE },
 943                 { "OSFXSR",     CR4_OSFXSR,     CR4_OSFXSR },
 944                 { "OSXMMEXCPT", CR4_OSXMMEXCPT, CR4_OSXMMEXCPT },
 945                 { "VMXE",       CR4_VMXE,       CR4_VMXE },
 946                 { "SMXE",       CR4_SMXE,       CR4_SMXE },
 947                 { "PCIDE",      CR4_PCIDE,      CR4_PCIDE },
 948                 { "OSXSAVE",    CR4_OSXSAVE,    CR4_OSXSAVE },
 949                 { "SMEP",       CR4_SMEP,       CR4_SMEP },
 950                 { "SMAP",       CR4_SMAP,       CR4_SMAP },
 951                 { NULL,         0,              0 }
 952         };
 953 
 954         cr0 = kmdb_unix_getcr0();
 955         cr2 = kmdb_unix_getcr2();
 956         cr3 = kmdb_unix_getcr3();
 957         cr4 = kmdb_unix_getcr4();
 958 
 959         kmdb_unix_getgdtr(&gdtr);
 960 
 961         mdb_printf("%%cr0 = 0x%lx <%b>\n", cr0, cr0, cr0_flag_bits);
 962         mdb_printf("%%cr2 = 0x%lx <%a>\n", cr2, cr2);
 963 
 964         if ((cr4 & CR4_PCIDE)) {
 965                 mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx pcid:%lu>\n", cr3,
 966                     cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET);
 967         } else {
 968                 mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx flags:%b>\n", cr3,
 969                     cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits);
 970         }
 971 
 972         mdb_printf("%%cr4 = 0x%lx <%b>\n", cr4, cr4, cr4_flag_bits);
 973 
 974         mdb_printf("%%gdtr.base = 0x%lx, %%gdtr.limit = 0x%hx\n",
 975             gdtr.dtr_base, gdtr.dtr_limit);
 976 
 977         return (DCMD_OK);
 978 }
 979 #endif
 980 
 981 static const mdb_dcmd_t dcmds[] = {
 982         { "gate_desc", ":", "dump a gate descriptor", gate_desc },
 983         { "idt", ":[-v]", "dump an IDT", idt },
 984         { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
 985         { "vatopfn", ":[-a as]", "translate address to physical page",
 986             va2pfn_dcmd },
 987         { "report_maps", ":[-m]",
 988             "Given PFN, report mappings / page table usage",
 989             report_maps_dcmd, report_maps_help },
 990         { "htables", "", "Given hat_t *, lists all its htable_t * values",
 991             htables_dcmd, htables_help },
 992         { "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
 993             ptable_dcmd, ptable_help },
 994         { "ptmap", ":", "Given a cr3 value, dump all mappings",
 995             ptmap_dcmd, ptmap_help },
 996         { "pte", ":[-l N]", "print human readable page table entry",
 997             pte_dcmd },
 998         { "pfntomfn", ":", "convert physical page to hypervisor machine page",
 999             pfntomfn_dcmd },
1000         { "mfntopfn", ":", "convert hypervisor machine page to physical page",
1001             mfntopfn_dcmd },
1002         { "memseg_list", ":", "show memseg list", memseg_list },
1003         { "scalehrtime", ":",
1004             "scale an unscaled high-res time", scalehrtime_cmd },
1005         { "x86_featureset", NULL, "dump the x86_featureset vector",
1006                 x86_featureset_cmd },
1007 #ifdef _KMDB
1008         { "sysregs", NULL, "dump system registers", sysregs_dcmd },
1009 #endif
1010         { NULL }
1011 };
1012 
1013 static const mdb_walker_t walkers[] = {
1014         { "ttrace", "walks trap trace buffers in reverse chronological order",
1015                 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1016         { "mutex_owner", "walks the owner of a mutex",
1017                 mutex_owner_init, mutex_owner_step },
1018         { "memseg", "walk the memseg structures",
1019                 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1020         { NULL }
1021 };
1022 
1023 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1024 
1025 const mdb_modinfo_t *
1026 _mdb_init(void)
1027 {
1028         return (&modinfo);
1029 }
1030 
1031 void
1032 _mdb_fini(void)
1033 {
1034         free_mmu();
1035 }