1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2018 Joyent, Inc.
  24  */
  25 
  26 #include <mdb/mdb_modapi.h>
  27 #include <mdb/mdb_ctf.h>
  28 #include <sys/cpuvar.h>
  29 #include <sys/systm.h>
  30 #include <sys/traptrace.h>
  31 #include <sys/x_call.h>
  32 #include <sys/xc_levels.h>
  33 #include <sys/avintr.h>
  34 #include <sys/systm.h>
  35 #include <sys/trap.h>
  36 #include <sys/mutex.h>
  37 #include <sys/mutex_impl.h>
  38 #include "i86mmu.h"
  39 #include "unix_sup.h"
  40 #include <sys/apix.h>
  41 #include <sys/x86_archext.h>
  42 #include <sys/bitmap.h>
  43 #include <sys/controlregs.h>
  44 
  45 #define TT_HDLR_WIDTH   17
  46 
  47 
  48 /* apix only */
  49 static apix_impl_t *d_apixs[NCPU];
  50 static int use_apix = 0;
  51 
  52 static int
  53 ttrace_ttr_size_check(void)
  54 {
  55         mdb_ctf_id_t ttrtid;
  56         ssize_t ttr_size;
  57 
  58         if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
  59             mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
  60                 mdb_warn("failed to determine size of trap_trace_rec_t; "
  61                     "non-TRAPTRACE kernel?\n");
  62                 return (0);
  63         }
  64 
  65         if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
  66             sizeof (trap_trace_rec_t)) {
  67                 /*
  68                  * On Intel machines, this will happen when TTR_STACK_DEPTH
  69                  * is changed.  This code could be smarter, and could
  70                  * dynamically adapt to different depths, but not until a
  71                  * need for such adaptation is demonstrated.
  72                  */
  73                 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
  74                     "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
  75                 return (0);
  76         }
  77 
  78         return (1);
  79 }
  80 
  81 int
  82 ttrace_walk_init(mdb_walk_state_t *wsp)
  83 {
  84         trap_trace_ctl_t *ttcp;
  85         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
  86         int i;
  87 
  88         if (!ttrace_ttr_size_check())
  89                 return (WALK_ERR);
  90 
  91         ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
  92 
  93         if (wsp->walk_addr != 0) {
  94                 mdb_warn("ttrace only supports global walks\n");
  95                 return (WALK_ERR);
  96         }
  97 
  98         if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
  99                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 100                     "non-TRAPTRACE kernel?\n");
 101                 mdb_free(ttcp, ttc_size);
 102                 return (WALK_ERR);
 103         }
 104 
 105         /*
 106          * We'll poach the ttc_current pointer (which isn't used for
 107          * anything) to store a pointer to our current TRAPTRACE record.
 108          * This allows us to only keep the array of trap_trace_ctl structures
 109          * as our walker state (ttc_current may be the only kernel data
 110          * structure member added exclusively to make writing the mdb walker
 111          * a little easier).
 112          */
 113         for (i = 0; i < NCPU; i++) {
 114                 trap_trace_ctl_t *ttc = &ttcp[i];
 115 
 116                 if (ttc->ttc_first == 0)
 117                         continue;
 118 
 119                 /*
 120                  * Assign ttc_current to be the last completed record.
 121                  * Note that the error checking (i.e. in the ttc_next ==
 122                  * ttc_first case) is performed in the step function.
 123                  */
 124                 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
 125         }
 126 
 127         wsp->walk_data = ttcp;
 128         return (WALK_NEXT);
 129 }
 130 
 131 int
 132 ttrace_walk_step(mdb_walk_state_t *wsp)
 133 {
 134         trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
 135         trap_trace_rec_t rec;
 136         int rval, i, recsize = sizeof (trap_trace_rec_t);
 137         hrtime_t latest = 0;
 138 
 139         /*
 140          * Loop through the CPUs, looking for the latest trap trace record
 141          * (we want to walk through the trap trace records in reverse
 142          * chronological order).
 143          */
 144         for (i = 0; i < NCPU; i++) {
 145                 ttc = &ttcp[i];
 146 
 147                 if (ttc->ttc_current == 0)
 148                         continue;
 149 
 150                 if (ttc->ttc_current < ttc->ttc_first)
 151                         ttc->ttc_current = ttc->ttc_limit - recsize;
 152 
 153                 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 154                         mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 155                         return (WALK_ERR);
 156                 }
 157 
 158                 if (rec.ttr_stamp > latest) {
 159                         latest = rec.ttr_stamp;
 160                         latest_ttc = ttc;
 161                 }
 162         }
 163 
 164         if (latest == 0)
 165                 return (WALK_DONE);
 166 
 167         ttc = latest_ttc;
 168 
 169         if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 170                 mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 171                 return (WALK_ERR);
 172         }
 173 
 174         rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
 175 
 176         if (ttc->ttc_current == ttc->ttc_next)
 177                 ttc->ttc_current = 0;
 178         else
 179                 ttc->ttc_current -= sizeof (trap_trace_rec_t);
 180 
 181         return (rval);
 182 }
 183 
 184 void
 185 ttrace_walk_fini(mdb_walk_state_t *wsp)
 186 {
 187         mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
 188 }
 189 
 190 static int
 191 ttrace_syscall(trap_trace_rec_t *rec)
 192 {
 193         GElf_Sym sym;
 194         int sysnum = rec->ttr_sysnum;
 195         uintptr_t addr;
 196         struct sysent sys;
 197 
 198         mdb_printf("%-3x", sysnum);
 199 
 200         if (rec->ttr_sysnum > NSYSCALL) {
 201                 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
 202                 return (0);
 203         }
 204 
 205         if (mdb_lookup_by_name("sysent", &sym) == -1) {
 206                 mdb_warn("\ncouldn't find 'sysent'");
 207                 return (-1);
 208         }
 209 
 210         addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
 211 
 212         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 213                 mdb_warn("\nsysnum %d out-of-range\n", sysnum);
 214                 return (-1);
 215         }
 216 
 217         if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
 218                 mdb_warn("\nfailed to read sysent at %p", addr);
 219                 return (-1);
 220         }
 221 
 222         mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
 223 
 224         return (0);
 225 }
 226 
 227 static int
 228 ttrace_interrupt(trap_trace_rec_t *rec)
 229 {
 230         GElf_Sym sym;
 231         uintptr_t addr;
 232         struct av_head hd;
 233         struct autovec av;
 234 
 235         switch (rec->ttr_regs.r_trapno) {
 236         case T_SOFTINT:
 237                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 238                 return (0);
 239         default:
 240                 break;
 241         }
 242 
 243         mdb_printf("%-3x ", rec->ttr_vector);
 244 
 245         if (mdb_lookup_by_name("autovect", &sym) == -1) {
 246                 mdb_warn("\ncouldn't find 'autovect'");
 247                 return (-1);
 248         }
 249 
 250         addr = (uintptr_t)sym.st_value +
 251             rec->ttr_vector * sizeof (struct av_head);
 252 
 253         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 254                 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
 255                 return (-1);
 256         }
 257 
 258         if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
 259                 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
 260                 return (-1);
 261         }
 262 
 263         if (hd.avh_link == NULL) {
 264                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 265                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 266                 else
 267                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 268         } else {
 269                 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
 270                         mdb_warn("couldn't read autovec at %p",
 271                             (uintptr_t)hd.avh_link);
 272                 }
 273 
 274                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 275         }
 276 
 277         return (0);
 278 }
 279 
 280 static int
 281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
 282 {
 283         struct autovec av;
 284         apix_impl_t apix;
 285         apix_vector_t apix_vector;
 286 
 287         switch (rec->ttr_regs.r_trapno) {
 288         case T_SOFTINT:
 289                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 290                 return (0);
 291         default:
 292                 break;
 293         }
 294 
 295         mdb_printf("%-3x ", rec->ttr_vector);
 296 
 297         /* Read the per CPU apix entry */
 298         if (mdb_vread(&apix, sizeof (apix_impl_t),
 299             (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
 300                 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
 301                 return (-1);
 302         }
 303         if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
 304             (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
 305                 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
 306                 return (-1);
 307         }
 308         if (apix_vector.v_share == 0) {
 309                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 310                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 311                 else
 312                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 313         } else {
 314                 if (mdb_vread(&av, sizeof (struct autovec),
 315                     (uintptr_t)(apix_vector.v_autovect)) == -1) {
 316                         mdb_warn("couldn't read autovec at %p",
 317                             (uintptr_t)apix_vector.v_autovect);
 318                 }
 319 
 320                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 321         }
 322 
 323         return (0);
 324 }
 325 
 326 
 327 static struct {
 328         int tt_trapno;
 329         char *tt_name;
 330 } ttrace_traps[] = {
 331         { T_ZERODIV,    "divide-error" },
 332         { T_SGLSTP,     "debug-exception" },
 333         { T_NMIFLT,     "nmi-interrupt" },
 334         { T_BPTFLT,     "breakpoint" },
 335         { T_OVFLW,      "into-overflow" },
 336         { T_BOUNDFLT,   "bound-exceeded" },
 337         { T_ILLINST,    "invalid-opcode" },
 338         { T_NOEXTFLT,   "device-not-avail" },
 339         { T_DBLFLT,     "double-fault" },
 340         { T_EXTOVRFLT,  "segment-overrun" },
 341         { T_TSSFLT,     "invalid-tss" },
 342         { T_SEGFLT,     "segment-not-pres" },
 343         { T_STKFLT,     "stack-fault" },
 344         { T_GPFLT,      "general-protectn" },
 345         { T_PGFLT,      "page-fault" },
 346         { T_EXTERRFLT,  "error-fault" },
 347         { T_ALIGNMENT,  "alignment-check" },
 348         { T_MCE,        "machine-check" },
 349         { T_SIMDFPE,    "sse-exception" },
 350 
 351         { T_DBGENTR,    "debug-enter" },
 352         { T_FASTTRAP,   "fasttrap-0xd2" },
 353         { T_SYSCALLINT, "syscall-0x91" },
 354         { T_DTRACE_RET, "dtrace-ret" },
 355         { T_SOFTINT,    "softint" },
 356         { T_INTERRUPT,  "interrupt" },
 357         { T_FAULT,      "fault" },
 358         { T_AST,        "ast" },
 359         { T_SYSCALL,    "syscall" },
 360 
 361         { 0,            NULL }
 362 };
 363 
 364 static int
 365 ttrace_trap(trap_trace_rec_t *rec)
 366 {
 367         int i;
 368 
 369         if (rec->ttr_regs.r_trapno == T_AST)
 370                 mdb_printf("%-3s ", "-");
 371         else
 372                 mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
 373 
 374         for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
 375                 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
 376                         break;
 377         }
 378 
 379         if (ttrace_traps[i].tt_name == NULL)
 380                 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
 381         else
 382                 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
 383 
 384         return (0);
 385 }
 386 
 387 static void
 388 ttrace_intr_detail(trap_trace_rec_t *rec)
 389 {
 390         mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
 391             rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
 392 }
 393 
 394 static struct {
 395         uchar_t t_marker;
 396         char *t_name;
 397         int (*t_hdlr)(trap_trace_rec_t *);
 398 } ttrace_hdlr[] = {
 399         { TT_SYSCALL, "sysc", ttrace_syscall },
 400         { TT_SYSENTER, "syse", ttrace_syscall },
 401         { TT_SYSC, "asys", ttrace_syscall },
 402         { TT_SYSC64, "sc64", ttrace_syscall },
 403         { TT_INTERRUPT, "intr", ttrace_interrupt },
 404         { TT_TRAP, "trap", ttrace_trap },
 405         { TT_EVENT, "evnt", ttrace_trap },
 406         { 0, NULL, NULL }
 407 };
 408 
 409 typedef struct ttrace_dcmd {
 410         processorid_t ttd_cpu;
 411         uint_t ttd_extended;
 412         uintptr_t ttd_kthread;
 413         trap_trace_ctl_t ttd_ttc[NCPU];
 414 } ttrace_dcmd_t;
 415 
 416 #if defined(__amd64)
 417 
 418 #define DUMP(reg) #reg, regs->r_##reg
 419 #define THREEREGS       "         %3s: %16lx %3s: %16lx %3s: %16lx\n"
 420 
 421 static void
 422 ttrace_dumpregs(trap_trace_rec_t *rec)
 423 {
 424         struct regs *regs = &rec->ttr_regs;
 425 
 426         mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
 427         mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
 428         mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
 429         mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
 430         mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
 431         mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
 432         mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
 433         mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
 434         mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
 435         mdb_printf("         %3s: %16lx %3s: %16lx\n",
 436             "fsb", regs->__r_fsbase,
 437             "gsb", regs->__r_gsbase);
 438         mdb_printf("\n");
 439 }
 440 
 441 #else
 442 
 443 #define DUMP(reg) #reg, regs->r_##reg
 444 #define FOURREGS        "         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
 445 
 446 static void
 447 ttrace_dumpregs(trap_trace_rec_t *rec)
 448 {
 449         struct regs *regs = &rec->ttr_regs;
 450 
 451         mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
 452         mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
 453         mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
 454         mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
 455             DUMP(pc), DUMP(cs));
 456         mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
 457             "cr2", rec->ttr_cr2);
 458         mdb_printf("\n");
 459 }
 460 
 461 #endif  /* __amd64 */
 462 
 463 int
 464 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
 465 {
 466         struct regs *regs = &rec->ttr_regs;
 467         processorid_t cpu = -1, i;
 468 
 469         for (i = 0; i < NCPU; i++) {
 470                 if (addr >= dcmd->ttd_ttc[i].ttc_first &&
 471                     addr < dcmd->ttd_ttc[i].ttc_limit) {
 472                         cpu = i;
 473                         break;
 474                 }
 475         }
 476 
 477         if (cpu == -1) {
 478                 mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
 479                 return (WALK_ERR);
 480         }
 481 
 482         if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
 483                 return (WALK_NEXT);
 484 
 485         if (dcmd->ttd_kthread != 0 &&
 486             dcmd->ttd_kthread != rec->ttr_curthread)
 487                 return (WALK_NEXT);
 488 
 489         mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
 490 
 491         for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
 492                 if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
 493                         continue;
 494                 mdb_printf("%4s ", ttrace_hdlr[i].t_name);
 495                 if (ttrace_hdlr[i].t_hdlr(rec) == -1)
 496                         return (WALK_ERR);
 497         }
 498 
 499         mdb_printf(" %a\n", regs->r_pc);
 500 
 501         if (dcmd->ttd_extended == FALSE)
 502                 return (WALK_NEXT);
 503 
 504         if (rec->ttr_marker == TT_INTERRUPT)
 505                 ttrace_intr_detail(rec);
 506         else
 507                 ttrace_dumpregs(rec);
 508 
 509         if (rec->ttr_sdepth > 0) {
 510                 for (i = 0; i < rec->ttr_sdepth; i++) {
 511                         if (i >= TTR_STACK_DEPTH) {
 512                                 mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
 513                                     "should be <= %d)\n", " ", rec->ttr_sdepth,
 514                                     TTR_STACK_DEPTH);
 515                                 break;
 516                         }
 517 
 518                         mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
 519                 }
 520                 mdb_printf("\n");
 521         }
 522 
 523         return (WALK_NEXT);
 524 }
 525 
 526 int
 527 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 528 {
 529         ttrace_dcmd_t dcmd;
 530         trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
 531         trap_trace_rec_t rec;
 532         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
 533 
 534         if (!ttrace_ttr_size_check())
 535                 return (WALK_ERR);
 536 
 537         bzero(&dcmd, sizeof (dcmd));
 538         dcmd.ttd_cpu = -1;
 539         dcmd.ttd_extended = FALSE;
 540 
 541         if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
 542                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 543                     "non-TRAPTRACE kernel?\n");
 544                 return (DCMD_ERR);
 545         }
 546 
 547         if (mdb_getopts(argc, argv,
 548             'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
 549             't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
 550                 return (DCMD_USAGE);
 551 
 552         if (DCMD_HDRSPEC(flags)) {
 553                 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
 554                     "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
 555                     " EIP");
 556         }
 557 
 558         if (flags & DCMD_ADDRSPEC) {
 559                 if (addr >= NCPU) {
 560                         if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
 561                                 mdb_warn("couldn't read trap trace record "
 562                                     "at %p", addr);
 563                                 return (DCMD_ERR);
 564                         }
 565 
 566                         if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
 567                                 return (DCMD_ERR);
 568 
 569                         return (DCMD_OK);
 570                 }
 571                 dcmd.ttd_cpu = addr;
 572         }
 573 
 574         if (mdb_readvar(&use_apix, "apix_enable") == -1) {
 575                 mdb_warn("failed to read apix_enable");
 576                 use_apix = 0;
 577         }
 578 
 579         if (use_apix) {
 580                 if (mdb_readvar(&d_apixs, "apixs") == -1) {
 581                         mdb_warn("\nfailed to read apixs.");
 582                         return (DCMD_ERR);
 583                 }
 584                 /* change to apix ttrace interrupt handler */
 585                 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
 586         }
 587 
 588         if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
 589                 mdb_warn("couldn't walk 'ttrace'");
 590                 return (DCMD_ERR);
 591         }
 592 
 593         return (DCMD_OK);
 594 }
 595 
 596 /*ARGSUSED*/
 597 int
 598 mutex_owner_init(mdb_walk_state_t *wsp)
 599 {
 600         return (WALK_NEXT);
 601 }
 602 
 603 int
 604 mutex_owner_step(mdb_walk_state_t *wsp)
 605 {
 606         uintptr_t addr = wsp->walk_addr;
 607         mutex_impl_t mtx;
 608         uintptr_t owner;
 609         kthread_t thr;
 610 
 611         if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
 612                 return (WALK_ERR);
 613 
 614         if (!MUTEX_TYPE_ADAPTIVE(&mtx))
 615                 return (WALK_DONE);
 616 
 617         if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == 0)
 618                 return (WALK_DONE);
 619 
 620         if (mdb_vread(&thr, sizeof (thr), owner) != -1)
 621                 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
 622 
 623         return (WALK_DONE);
 624 }
 625 
 626 static void
 627 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
 628 {
 629         const char *lastnm;
 630         uint_t lastval;
 631         char type[4];
 632 
 633         switch (gate->sgd_type) {
 634         case SDT_SYSIGT:
 635                 strcpy(type, "int");
 636                 break;
 637         case SDT_SYSTGT:
 638                 strcpy(type, "trp");
 639                 break;
 640         case SDT_SYSTASKGT:
 641                 strcpy(type, "tsk");
 642                 break;
 643         default:
 644                 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
 645         }
 646 
 647 #if defined(__amd64)
 648         lastnm = "IST";
 649         lastval = gate->sgd_ist;
 650 #else
 651         lastnm = "STK";
 652         lastval = gate->sgd_stkcpy;
 653 #endif
 654 
 655         if (header) {
 656                 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
 657                     "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
 658                     "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
 659         }
 660 
 661         mdb_printf("%s", label);
 662 
 663         if (gate->sgd_type == SDT_SYSTASKGT)
 664                 mdb_printf("%-30s ", "-");
 665         else
 666                 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
 667 
 668         mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
 669             gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
 670 }
 671 
 672 /*ARGSUSED*/
 673 static int
 674 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 675 {
 676         gate_desc_t gate;
 677 
 678         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
 679                 return (DCMD_USAGE);
 680 
 681         if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 682             sizeof (gate_desc_t)) {
 683                 mdb_warn("failed to read gate descriptor at %p\n", addr);
 684                 return (DCMD_ERR);
 685         }
 686 
 687         gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
 688 
 689         return (DCMD_OK);
 690 }
 691 
 692 /*ARGSUSED*/
 693 static int
 694 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 695 {
 696         int i;
 697 
 698         if (!(flags & DCMD_ADDRSPEC)) {
 699                 GElf_Sym idt0_va;
 700                 gate_desc_t *idt0;
 701 
 702                 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
 703                         mdb_warn("failed to find VA of idt0");
 704                         return (DCMD_ERR);
 705                 }
 706 
 707                 addr = idt0_va.st_value;
 708                 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
 709                         mdb_warn("failed to read idt0 at %p\n", addr);
 710                         return (DCMD_ERR);
 711                 }
 712 
 713                 addr = (uintptr_t)idt0;
 714         }
 715 
 716         for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
 717                 gate_desc_t gate;
 718                 char label[6];
 719 
 720                 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 721                     sizeof (gate_desc_t)) {
 722                         mdb_warn("failed to read gate descriptor at %p\n",
 723                             addr);
 724                         return (DCMD_ERR);
 725                 }
 726 
 727                 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
 728                 gate_desc_dump(&gate, label, i == 0);
 729         }
 730 
 731         return (DCMD_OK);
 732 }
 733 
 734 static void
 735 htables_help(void)
 736 {
 737         mdb_printf(
 738             "Given a (hat_t *), generates the list of all (htable_t *)s\n"
 739             "that correspond to that address space\n");
 740 }
 741 
 742 static void
 743 report_maps_help(void)
 744 {
 745         mdb_printf(
 746             "Given a PFN, report HAT structures that map the page, or use\n"
 747             "the page as a pagetable.\n"
 748             "\n"
 749             "-m Interpret the PFN as an MFN (machine frame number)\n");
 750 }
 751 
 752 static void
 753 ptable_help(void)
 754 {
 755         mdb_printf(
 756             "Given a PFN holding a page table, print its contents, and\n"
 757             "the address of the corresponding htable structure.\n"
 758             "\n"
 759             "-m Interpret the PFN as an MFN (machine frame number)\n"
 760             "-l force page table level (3 is top)\n");
 761 }
 762 
 763 static void
 764 ptmap_help(void)
 765 {
 766         mdb_printf(
 767             "Report all mappings represented by the page table hierarchy\n"
 768             "rooted at the given cr3 value / physical address.\n"
 769             "\n"
 770             "-w run ::whatis on mapping start addresses\n");
 771 }
 772 
 773 static const char *const scalehrtime_desc =
 774         "Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n"
 775         "are used as both a quick way of accumulating relative time (as for\n"
 776         "usage) and as a quick way of getting the absolute current time.\n"
 777         "These uses require slightly different scaling algorithms. By\n"
 778         "default, if a specified time is greater than half of the unscaled\n"
 779         "time at the last tick (that is, if the unscaled time represents\n"
 780         "more than half the time since boot), the timestamp is assumed to\n"
 781         "be absolute, and the scaling algorithm used mimics that which the\n"
 782         "kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n"
 783         "be relative, and the algorithm mimics scalehrtime(). This behavior\n"
 784         "can be overridden by forcing the unscaled time to be interpreted\n"
 785         "as relative (via -r) or absolute (via -a).\n";
 786 
 787 static void
 788 scalehrtime_help(void)
 789 {
 790         mdb_printf("%s", scalehrtime_desc);
 791 }
 792 
 793 /*
 794  * NSEC_SHIFT is replicated here (it is not defined in a header file),
 795  * but for amusement, the reader is directed to the comment that explains
 796  * the rationale for this particular value on x86.  Spoiler:  the value is
 797  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 798  * in that comment sounds too familiar, it's because your author also wrote
 799  * that code -- some fifteen years prior to this writing in 2011...)
 800  */
 801 #define NSEC_SHIFT 5
 802 
 803 /*ARGSUSED*/
 804 static int
 805 scalehrtime_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 806 {
 807         uint32_t nsec_scale;
 808         hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1;
 809         unsigned int *tscp = (unsigned int *)&tsc;
 810         uintptr_t scalehrtimef;
 811         uint64_t scale;
 812         GElf_Sym sym;
 813         int expected = !(flags & DCMD_ADDRSPEC);
 814         uint_t absolute = FALSE, relative = FALSE;
 815 
 816         if (mdb_getopts(argc, argv,
 817             'a', MDB_OPT_SETBITS, TRUE, &absolute,
 818             'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected)
 819                 return (DCMD_USAGE);
 820 
 821         if (absolute && relative) {
 822                 mdb_warn("can't specify both -a and -r\n");
 823                 return (DCMD_USAGE);
 824         }
 825 
 826         if (expected == 1) {
 827                 switch (argv[argc - 1].a_type) {
 828                 case MDB_TYPE_STRING:
 829                         tsc = mdb_strtoull(argv[argc - 1].a_un.a_str);
 830                         break;
 831                 case MDB_TYPE_IMMEDIATE:
 832                         tsc = argv[argc - 1].a_un.a_val;
 833                         break;
 834                 default:
 835                         return (DCMD_USAGE);
 836                 }
 837         }
 838 
 839         if (mdb_readsym(&scalehrtimef,
 840             sizeof (scalehrtimef), "scalehrtimef") == -1) {
 841                 mdb_warn("couldn't read 'scalehrtimef'");
 842                 return (DCMD_ERR);
 843         }
 844 
 845         if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
 846                 mdb_warn("couldn't find 'tsc_scalehrtime'");
 847                 return (DCMD_ERR);
 848         }
 849 
 850         if (sym.st_value != scalehrtimef) {
 851                 mdb_warn("::scalehrtime requires that scalehrtimef "
 852                     "be set to tsc_scalehrtime\n");
 853                 return (DCMD_ERR);
 854         }
 855 
 856         if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 857                 mdb_warn("couldn't read 'nsec_scale'");
 858                 return (DCMD_ERR);
 859         }
 860 
 861         if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) {
 862                 mdb_warn("couldn't read 'tsc_last'");
 863                 return (DCMD_ERR);
 864         }
 865 
 866         if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) {
 867                 mdb_warn("couldn't read 'tsc_hrtime_base'");
 868                 return (DCMD_ERR);
 869         }
 870 
 871         /*
 872          * If our time is greater than half of tsc_last, we will take our
 873          * delta against tsc_last, convert it, and add that to (or subtract it
 874          * from) tsc_hrtime_base.  This mimics what the kernel actually does
 875          * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much
 876          * higher precision result than trying to convert a large tsc value.
 877          */
 878         if (absolute || (tsc > (tsc_last >> 1) && !relative)) {
 879                 if (tsc > tsc_last) {
 880                         tsc = tsc - tsc_last;
 881                 } else {
 882                         tsc = tsc_last - tsc;
 883                         mult = -1;
 884                 }
 885         } else {
 886                 base = 0;
 887         }
 888 
 889         scale = (uint64_t)nsec_scale;
 890 
 891         hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 892         hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 893 
 894         mdb_printf("0x%llx\n", base + (hrt * mult));
 895 
 896         return (DCMD_OK);
 897 }
 898 
 899 /*
 900  * The x86 feature set is implemented as a bitmap array. That bitmap array is
 901  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 902  * macro. We have the names for each of these features in unix's text segment
 903  * so we do not have to duplicate them and instead just look them up.
 904  */
 905 /*ARGSUSED*/
 906 static int
 907 x86_featureset_dcmd(uintptr_t addr, uint_t flags, int argc,
 908     const mdb_arg_t *argv)
 909 {
 910         void *fset;
 911         GElf_Sym sym;
 912         uintptr_t nptr;
 913         char name[128];
 914         int ii;
 915 
 916         size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
 917 
 918         if (argc != 0)
 919                 return (DCMD_USAGE);
 920 
 921         if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
 922                 mdb_warn("couldn't find x86_feature_names");
 923                 return (DCMD_ERR);
 924         }
 925 
 926         fset = mdb_zalloc(sz, UM_NOSLEEP);
 927         if (fset == NULL) {
 928                 mdb_warn("failed to allocate memory for x86_featureset");
 929                 return (DCMD_ERR);
 930         }
 931 
 932         if (mdb_readvar(fset, "x86_featureset") != sz) {
 933                 mdb_warn("failed to read x86_featureset");
 934                 mdb_free(fset, sz);
 935                 return (DCMD_ERR);
 936         }
 937 
 938         for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
 939                 if (!BT_TEST((ulong_t *)fset, ii))
 940                         continue;
 941 
 942                 if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
 943                     sizeof (void *) * ii) != sizeof (char *)) {
 944                         mdb_warn("failed to read feature array %d", ii);
 945                         mdb_free(fset, sz);
 946                         return (DCMD_ERR);
 947                 }
 948 
 949                 if (mdb_readstr(name, sizeof (name), nptr) == -1) {
 950                         mdb_warn("failed to read feature %d", ii);
 951                         mdb_free(fset, sz);
 952                         return (DCMD_ERR);
 953                 }
 954                 mdb_printf("%s\n", name);
 955         }
 956 
 957         mdb_free(fset, sz);
 958         return (DCMD_OK);
 959 }
 960 
 961 #ifdef _KMDB
 962 /* ARGSUSED */
 963 static int
 964 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 965 {
 966         ulong_t cr0, cr2, cr3, cr4;
 967         desctbr_t gdtr;
 968 
 969         static const mdb_bitmask_t cr0_flag_bits[] = {
 970                 { "PE",         CR0_PE,         CR0_PE },
 971                 { "MP",         CR0_MP,         CR0_MP },
 972                 { "EM",         CR0_EM,         CR0_EM },
 973                 { "TS",         CR0_TS,         CR0_TS },
 974                 { "ET",         CR0_ET,         CR0_ET },
 975                 { "NE",         CR0_NE,         CR0_NE },
 976                 { "WP",         CR0_WP,         CR0_WP },
 977                 { "AM",         CR0_AM,         CR0_AM },
 978                 { "NW",         CR0_NW,         CR0_NW },
 979                 { "CD",         CR0_CD,         CR0_CD },
 980                 { "PG",         CR0_PG,         CR0_PG },
 981                 { NULL,         0,              0 }
 982         };
 983 
 984         static const mdb_bitmask_t cr3_flag_bits[] = {
 985                 { "PCD",        CR3_PCD,        CR3_PCD },
 986                 { "PWT",        CR3_PWT,        CR3_PWT },
 987                 { NULL,         0,              0, }
 988         };
 989 
 990         static const mdb_bitmask_t cr4_flag_bits[] = {
 991                 { "VME",        CR4_VME,        CR4_VME },
 992                 { "PVI",        CR4_PVI,        CR4_PVI },
 993                 { "TSD",        CR4_TSD,        CR4_TSD },
 994                 { "DE",         CR4_DE,         CR4_DE },
 995                 { "PSE",        CR4_PSE,        CR4_PSE },
 996                 { "PAE",        CR4_PAE,        CR4_PAE },
 997                 { "MCE",        CR4_MCE,        CR4_MCE },
 998                 { "PGE",        CR4_PGE,        CR4_PGE },
 999                 { "PCE",        CR4_PCE,        CR4_PCE },
1000                 { "OSFXSR",     CR4_OSFXSR,     CR4_OSFXSR },
1001                 { "OSXMMEXCPT", CR4_OSXMMEXCPT, CR4_OSXMMEXCPT },
1002                 { "VMXE",       CR4_VMXE,       CR4_VMXE },
1003                 { "SMXE",       CR4_SMXE,       CR4_SMXE },
1004                 { "PCIDE",      CR4_PCIDE,      CR4_PCIDE },
1005                 { "OSXSAVE",    CR4_OSXSAVE,    CR4_OSXSAVE },
1006                 { "SMEP",       CR4_SMEP,       CR4_SMEP },
1007                 { "SMAP",       CR4_SMAP,       CR4_SMAP },
1008                 { NULL,         0,              0 }
1009         };
1010 
1011         cr0 = kmdb_unix_getcr0();
1012         cr2 = kmdb_unix_getcr2();
1013         cr3 = kmdb_unix_getcr3();
1014         cr4 = kmdb_unix_getcr4();
1015 
1016         kmdb_unix_getgdtr(&gdtr);
1017 
1018         mdb_printf("%%cr0 = 0x%lx <%b>\n", cr0, cr0, cr0_flag_bits);
1019         mdb_printf("%%cr2 = 0x%lx <%a>\n", cr2, cr2);
1020 
1021         if ((cr4 & CR4_PCIDE)) {
1022                 mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx pcid:%lu>\n", cr3,
1023                     cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET);
1024         } else {
1025                 mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx flags:%b>\n", cr3,
1026                     cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits);
1027         }
1028 
1029         mdb_printf("%%cr4 = 0x%lx <%b>\n", cr4, cr4, cr4_flag_bits);
1030 
1031         mdb_printf("%%gdtr.base = 0x%lx, %%gdtr.limit = 0x%hx\n",
1032             gdtr.dtr_base, gdtr.dtr_limit);
1033 
1034         return (DCMD_OK);
1035 }
1036 #endif
1037 
1038 extern void xcall_help(void);
1039 extern int xcall_dcmd(uintptr_t, uint_t, int, const mdb_arg_t *);
1040 
1041 static const mdb_dcmd_t dcmds[] = {
1042         { "gate_desc", ":", "dump a gate descriptor", gate_desc },
1043         { "idt", ":[-v]", "dump an IDT", idt },
1044         { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
1045         { "vatopfn", ":[-a as]", "translate address to physical page",
1046             va2pfn_dcmd },
1047         { "report_maps", ":[-m]",
1048             "Given PFN, report mappings / page table usage",
1049             report_maps_dcmd, report_maps_help },
1050         { "htables", "", "Given hat_t *, lists all its htable_t * values",
1051             htables_dcmd, htables_help },
1052         { "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
1053             ptable_dcmd, ptable_help },
1054         { "ptmap", ":", "Given a cr3 value, dump all mappings",
1055             ptmap_dcmd, ptmap_help },
1056         { "pte", ":[-l N]", "print human readable page table entry",
1057             pte_dcmd },
1058         { "pfntomfn", ":", "convert physical page to hypervisor machine page",
1059             pfntomfn_dcmd },
1060         { "mfntopfn", ":", "convert hypervisor machine page to physical page",
1061             mfntopfn_dcmd },
1062         { "memseg_list", ":", "show memseg list", memseg_list },
1063         { "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time",
1064             scalehrtime_dcmd, scalehrtime_help },
1065         { "x86_featureset", NULL, "dump the x86_featureset vector",
1066                 x86_featureset_dcmd },
1067         { "xcall", ":", "print CPU cross-call state", xcall_dcmd, xcall_help },
1068 #ifdef _KMDB
1069         { "sysregs", NULL, "dump system registers", sysregs_dcmd },
1070 #endif
1071         { NULL }
1072 };
1073 
1074 static const mdb_walker_t walkers[] = {
1075         { "ttrace", "walks trap trace buffers in reverse chronological order",
1076                 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1077         { "mutex_owner", "walks the owner of a mutex",
1078                 mutex_owner_init, mutex_owner_step },
1079         { "memseg", "walk the memseg structures",
1080                 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1081         { NULL }
1082 };
1083 
1084 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1085 
1086 const mdb_modinfo_t *
1087 _mdb_init(void)
1088 {
1089         return (&modinfo);
1090 }
1091 
1092 void
1093 _mdb_fini(void)
1094 {
1095         free_mmu();
1096 }