1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2018 Joyent, Inc.
  24  */
  25 
  26 #include <mdb/mdb_modapi.h>
  27 #include <mdb/mdb_ctf.h>
  28 #include <sys/cpuvar.h>
  29 #include <sys/systm.h>
  30 #include <sys/traptrace.h>
  31 #include <sys/x_call.h>
  32 #include <sys/xc_levels.h>
  33 #include <sys/avintr.h>
  34 #include <sys/systm.h>
  35 #include <sys/trap.h>
  36 #include <sys/mutex.h>
  37 #include <sys/mutex_impl.h>
  38 #include "i86mmu.h"
  39 #include "unix_sup.h"
  40 #include <sys/apix.h>
  41 #include <sys/x86_archext.h>
  42 #include <sys/bitmap.h>
  43 #include <sys/controlregs.h>
  44 
  45 #define TT_HDLR_WIDTH   17
  46 
  47 
  48 /* apix only */
  49 static apix_impl_t *d_apixs[NCPU];
  50 static int use_apix = 0;
  51 
  52 static int
  53 ttrace_ttr_size_check(void)
  54 {
  55         mdb_ctf_id_t ttrtid;
  56         ssize_t ttr_size;
  57 
  58         if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
  59             mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
  60                 mdb_warn("failed to determine size of trap_trace_rec_t; "
  61                     "non-TRAPTRACE kernel?\n");
  62                 return (0);
  63         }
  64 
  65         if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
  66             sizeof (trap_trace_rec_t)) {
  67                 /*
  68                  * On Intel machines, this will happen when TTR_STACK_DEPTH
  69                  * is changed.  This code could be smarter, and could
  70                  * dynamically adapt to different depths, but not until a
  71                  * need for such adaptation is demonstrated.
  72                  */
  73                 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
  74                     "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
  75                 return (0);
  76         }
  77 
  78         return (1);
  79 }
  80 
  81 int
  82 ttrace_walk_init(mdb_walk_state_t *wsp)
  83 {
  84         trap_trace_ctl_t *ttcp;
  85         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
  86         int i;
  87 
  88         if (!ttrace_ttr_size_check())
  89                 return (WALK_ERR);
  90 
  91         ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
  92 
  93         if (wsp->walk_addr != NULL) {
  94                 mdb_warn("ttrace only supports global walks\n");
  95                 return (WALK_ERR);
  96         }
  97 
  98         if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
  99                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 100                     "non-TRAPTRACE kernel?\n");
 101                 mdb_free(ttcp, ttc_size);
 102                 return (WALK_ERR);
 103         }
 104 
 105         /*
 106          * We'll poach the ttc_current pointer (which isn't used for
 107          * anything) to store a pointer to our current TRAPTRACE record.
 108          * This allows us to only keep the array of trap_trace_ctl structures
 109          * as our walker state (ttc_current may be the only kernel data
 110          * structure member added exclusively to make writing the mdb walker
 111          * a little easier).
 112          */
 113         for (i = 0; i < NCPU; i++) {
 114                 trap_trace_ctl_t *ttc = &ttcp[i];
 115 
 116                 if (ttc->ttc_first == NULL)
 117                         continue;
 118 
 119                 /*
 120                  * Assign ttc_current to be the last completed record.
 121                  * Note that the error checking (i.e. in the ttc_next ==
 122                  * ttc_first case) is performed in the step function.
 123                  */
 124                 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
 125         }
 126 
 127         wsp->walk_data = ttcp;
 128         return (WALK_NEXT);
 129 }
 130 
 131 int
 132 ttrace_walk_step(mdb_walk_state_t *wsp)
 133 {
 134         trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
 135         trap_trace_rec_t rec;
 136         int rval, i, recsize = sizeof (trap_trace_rec_t);
 137         hrtime_t latest = 0;
 138 
 139         /*
 140          * Loop through the CPUs, looking for the latest trap trace record
 141          * (we want to walk through the trap trace records in reverse
 142          * chronological order).
 143          */
 144         for (i = 0; i < NCPU; i++) {
 145                 ttc = &ttcp[i];
 146 
 147                 if (ttc->ttc_current == NULL)
 148                         continue;
 149 
 150                 if (ttc->ttc_current < ttc->ttc_first)
 151                         ttc->ttc_current = ttc->ttc_limit - recsize;
 152 
 153                 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 154                         mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 155                         return (WALK_ERR);
 156                 }
 157 
 158                 if (rec.ttr_stamp > latest) {
 159                         latest = rec.ttr_stamp;
 160                         latest_ttc = ttc;
 161                 }
 162         }
 163 
 164         if (latest == 0)
 165                 return (WALK_DONE);
 166 
 167         ttc = latest_ttc;
 168 
 169         if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
 170                 mdb_warn("couldn't read rec at %p", ttc->ttc_current);
 171                 return (WALK_ERR);
 172         }
 173 
 174         rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
 175 
 176         if (ttc->ttc_current == ttc->ttc_next)
 177                 ttc->ttc_current = NULL;
 178         else
 179                 ttc->ttc_current -= sizeof (trap_trace_rec_t);
 180 
 181         return (rval);
 182 }
 183 
 184 void
 185 ttrace_walk_fini(mdb_walk_state_t *wsp)
 186 {
 187         mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
 188 }
 189 
 190 static int
 191 ttrace_syscall(trap_trace_rec_t *rec)
 192 {
 193         GElf_Sym sym;
 194         int sysnum = rec->ttr_sysnum;
 195         uintptr_t addr;
 196         struct sysent sys;
 197 
 198         mdb_printf("%-3x", sysnum);
 199 
 200         if (rec->ttr_sysnum > NSYSCALL) {
 201                 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
 202                 return (0);
 203         }
 204 
 205         if (mdb_lookup_by_name("sysent", &sym) == -1) {
 206                 mdb_warn("\ncouldn't find 'sysent'");
 207                 return (-1);
 208         }
 209 
 210         addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
 211 
 212         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 213                 mdb_warn("\nsysnum %d out-of-range\n", sysnum);
 214                 return (-1);
 215         }
 216 
 217         if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
 218                 mdb_warn("\nfailed to read sysent at %p", addr);
 219                 return (-1);
 220         }
 221 
 222         mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
 223 
 224         return (0);
 225 }
 226 
 227 static int
 228 ttrace_interrupt(trap_trace_rec_t *rec)
 229 {
 230         GElf_Sym sym;
 231         uintptr_t addr;
 232         struct av_head hd;
 233         struct autovec av;
 234 
 235         switch (rec->ttr_regs.r_trapno) {
 236         case T_SOFTINT:
 237                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 238                 return (0);
 239         default:
 240                 break;
 241         }
 242 
 243         mdb_printf("%-3x ", rec->ttr_vector);
 244 
 245         if (mdb_lookup_by_name("autovect", &sym) == -1) {
 246                 mdb_warn("\ncouldn't find 'autovect'");
 247                 return (-1);
 248         }
 249 
 250         addr = (uintptr_t)sym.st_value +
 251             rec->ttr_vector * sizeof (struct av_head);
 252 
 253         if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
 254                 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
 255                 return (-1);
 256         }
 257 
 258         if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
 259                 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
 260                 return (-1);
 261         }
 262 
 263         if (hd.avh_link == NULL) {
 264                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 265                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 266                 else
 267                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 268         } else {
 269                 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
 270                         mdb_warn("couldn't read autovec at %p",
 271                             (uintptr_t)hd.avh_link);
 272                 }
 273 
 274                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 275         }
 276 
 277         return (0);
 278 }
 279 
 280 static int
 281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
 282 {
 283         struct autovec av;
 284         apix_impl_t apix;
 285         apix_vector_t apix_vector;
 286 
 287         switch (rec->ttr_regs.r_trapno) {
 288         case T_SOFTINT:
 289                 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
 290                 return (0);
 291         default:
 292                 break;
 293         }
 294 
 295         mdb_printf("%-3x ", rec->ttr_vector);
 296 
 297         /* Read the per CPU apix entry */
 298         if (mdb_vread(&apix, sizeof (apix_impl_t),
 299             (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
 300                 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
 301                 return (-1);
 302         }
 303         if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
 304             (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
 305                 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
 306                 return (-1);
 307         }
 308         if (apix_vector.v_share == 0) {
 309                 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
 310                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
 311                 else
 312                         mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
 313         } else {
 314                 if (mdb_vread(&av, sizeof (struct autovec),
 315                     (uintptr_t)(apix_vector.v_autovect)) == -1) {
 316                         mdb_warn("couldn't read autovec at %p",
 317                             (uintptr_t)apix_vector.v_autovect);
 318                 }
 319 
 320                 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
 321         }
 322 
 323         return (0);
 324 }
 325 
 326 
 327 static struct {
 328         int tt_trapno;
 329         char *tt_name;
 330 } ttrace_traps[] = {
 331         { T_ZERODIV,    "divide-error" },
 332         { T_SGLSTP,     "debug-exception" },
 333         { T_NMIFLT,     "nmi-interrupt" },
 334         { T_BPTFLT,     "breakpoint" },
 335         { T_OVFLW,      "into-overflow" },
 336         { T_BOUNDFLT,   "bound-exceeded" },
 337         { T_ILLINST,    "invalid-opcode" },
 338         { T_NOEXTFLT,   "device-not-avail" },
 339         { T_DBLFLT,     "double-fault" },
 340         { T_EXTOVRFLT,  "segment-overrun" },
 341         { T_TSSFLT,     "invalid-tss" },
 342         { T_SEGFLT,     "segment-not-pres" },
 343         { T_STKFLT,     "stack-fault" },
 344         { T_GPFLT,      "general-protectn" },
 345         { T_PGFLT,      "page-fault" },
 346         { T_EXTERRFLT,  "error-fault" },
 347         { T_ALIGNMENT,  "alignment-check" },
 348         { T_MCE,        "machine-check" },
 349         { T_SIMDFPE,    "sse-exception" },
 350 
 351         { T_DBGENTR,    "debug-enter" },
 352         { T_FASTTRAP,   "fasttrap-0xd2" },
 353         { T_SYSCALLINT, "syscall-0x91" },
 354         { T_DTRACE_RET, "dtrace-ret" },
 355         { T_SOFTINT,    "softint" },
 356         { T_INTERRUPT,  "interrupt" },
 357         { T_FAULT,      "fault" },
 358         { T_AST,        "ast" },
 359         { T_SYSCALL,    "syscall" },
 360 
 361         { 0,            NULL }
 362 };
 363 
 364 static int
 365 ttrace_trap(trap_trace_rec_t *rec)
 366 {
 367         int i;
 368 
 369         if (rec->ttr_regs.r_trapno == T_AST)
 370                 mdb_printf("%-3s ", "-");
 371         else
 372                 mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
 373 
 374         for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
 375                 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
 376                         break;
 377         }
 378 
 379         if (ttrace_traps[i].tt_name == NULL)
 380                 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
 381         else
 382                 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
 383 
 384         return (0);
 385 }
 386 
 387 static void
 388 ttrace_intr_detail(trap_trace_rec_t *rec)
 389 {
 390         mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
 391             rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
 392 }
 393 
 394 static struct {
 395         uchar_t t_marker;
 396         char *t_name;
 397         int (*t_hdlr)(trap_trace_rec_t *);
 398 } ttrace_hdlr[] = {
 399         { TT_SYSCALL, "sysc", ttrace_syscall },
 400         { TT_SYSENTER, "syse", ttrace_syscall },
 401         { TT_SYSC, "asys", ttrace_syscall },
 402         { TT_SYSC64, "sc64", ttrace_syscall },
 403         { TT_INTERRUPT, "intr", ttrace_interrupt },
 404         { TT_TRAP, "trap", ttrace_trap },
 405         { TT_EVENT, "evnt", ttrace_trap },
 406         { 0, NULL, NULL }
 407 };
 408 
 409 typedef struct ttrace_dcmd {
 410         processorid_t ttd_cpu;
 411         uint_t ttd_extended;
 412         uintptr_t ttd_kthread;
 413         trap_trace_ctl_t ttd_ttc[NCPU];
 414 } ttrace_dcmd_t;
 415 
 416 #if defined(__amd64)
 417 
 418 #define DUMP(reg) #reg, regs->r_##reg
 419 #define THREEREGS       "         %3s: %16lx %3s: %16lx %3s: %16lx\n"
 420 
 421 static void
 422 ttrace_dumpregs(trap_trace_rec_t *rec)
 423 {
 424         struct regs *regs = &rec->ttr_regs;
 425 
 426         mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
 427         mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
 428         mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
 429         mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
 430         mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
 431         mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
 432         mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
 433         mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
 434         mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
 435         mdb_printf("\n");
 436 }
 437 
 438 #else
 439 
 440 #define DUMP(reg) #reg, regs->r_##reg
 441 #define FOURREGS        "         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
 442 
 443 static void
 444 ttrace_dumpregs(trap_trace_rec_t *rec)
 445 {
 446         struct regs *regs = &rec->ttr_regs;
 447 
 448         mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
 449         mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
 450         mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
 451         mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
 452             DUMP(pc), DUMP(cs));
 453         mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
 454             "cr2", rec->ttr_cr2);
 455         mdb_printf("\n");
 456 }
 457 
 458 #endif  /* __amd64 */
 459 
 460 int
 461 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
 462 {
 463         struct regs *regs = &rec->ttr_regs;
 464         processorid_t cpu = -1, i;
 465 
 466         for (i = 0; i < NCPU; i++) {
 467                 if (addr >= dcmd->ttd_ttc[i].ttc_first &&
 468                     addr < dcmd->ttd_ttc[i].ttc_limit) {
 469                         cpu = i;
 470                         break;
 471                 }
 472         }
 473 
 474         if (cpu == -1) {
 475                 mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
 476                 return (WALK_ERR);
 477         }
 478 
 479         if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
 480                 return (WALK_NEXT);
 481 
 482         if (dcmd->ttd_kthread != 0 &&
 483             dcmd->ttd_kthread != rec->ttr_curthread)
 484                 return (WALK_NEXT);
 485 
 486         mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
 487 
 488         for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
 489                 if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
 490                         continue;
 491                 mdb_printf("%4s ", ttrace_hdlr[i].t_name);
 492                 if (ttrace_hdlr[i].t_hdlr(rec) == -1)
 493                         return (WALK_ERR);
 494         }
 495 
 496         mdb_printf(" %a\n", regs->r_pc);
 497 
 498         if (dcmd->ttd_extended == FALSE)
 499                 return (WALK_NEXT);
 500 
 501         if (rec->ttr_marker == TT_INTERRUPT)
 502                 ttrace_intr_detail(rec);
 503         else
 504                 ttrace_dumpregs(rec);
 505 
 506         if (rec->ttr_sdepth > 0) {
 507                 for (i = 0; i < rec->ttr_sdepth; i++) {
 508                         if (i >= TTR_STACK_DEPTH) {
 509                                 mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
 510                                     "should be <= %d)\n", " ", rec->ttr_sdepth,
 511                                     TTR_STACK_DEPTH);
 512                                 break;
 513                         }
 514 
 515                         mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
 516                 }
 517                 mdb_printf("\n");
 518         }
 519 
 520         return (WALK_NEXT);
 521 }
 522 
 523 int
 524 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 525 {
 526         ttrace_dcmd_t dcmd;
 527         trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
 528         trap_trace_rec_t rec;
 529         size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
 530 
 531         if (!ttrace_ttr_size_check())
 532                 return (WALK_ERR);
 533 
 534         bzero(&dcmd, sizeof (dcmd));
 535         dcmd.ttd_cpu = -1;
 536         dcmd.ttd_extended = FALSE;
 537 
 538         if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
 539                 mdb_warn("symbol 'trap_trace_ctl' not found; "
 540                     "non-TRAPTRACE kernel?\n");
 541                 return (DCMD_ERR);
 542         }
 543 
 544         if (mdb_getopts(argc, argv,
 545             'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
 546             't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
 547                 return (DCMD_USAGE);
 548 
 549         if (DCMD_HDRSPEC(flags)) {
 550                 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
 551                     "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
 552                     " EIP");
 553         }
 554 
 555         if (flags & DCMD_ADDRSPEC) {
 556                 if (addr >= NCPU) {
 557                         if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
 558                                 mdb_warn("couldn't read trap trace record "
 559                                     "at %p", addr);
 560                                 return (DCMD_ERR);
 561                         }
 562 
 563                         if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
 564                                 return (DCMD_ERR);
 565 
 566                         return (DCMD_OK);
 567                 }
 568                 dcmd.ttd_cpu = addr;
 569         }
 570 
 571         if (mdb_readvar(&use_apix, "apix_enable") == -1) {
 572                 mdb_warn("failed to read apix_enable");
 573                 use_apix = 0;
 574         }
 575 
 576         if (use_apix) {
 577                 if (mdb_readvar(&d_apixs, "apixs") == -1) {
 578                         mdb_warn("\nfailed to read apixs.");
 579                         return (DCMD_ERR);
 580                 }
 581                 /* change to apix ttrace interrupt handler */
 582                 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
 583         }
 584 
 585         if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
 586                 mdb_warn("couldn't walk 'ttrace'");
 587                 return (DCMD_ERR);
 588         }
 589 
 590         return (DCMD_OK);
 591 }
 592 
 593 /*ARGSUSED*/
 594 int
 595 mutex_owner_init(mdb_walk_state_t *wsp)
 596 {
 597         return (WALK_NEXT);
 598 }
 599 
 600 int
 601 mutex_owner_step(mdb_walk_state_t *wsp)
 602 {
 603         uintptr_t addr = wsp->walk_addr;
 604         mutex_impl_t mtx;
 605         uintptr_t owner;
 606         kthread_t thr;
 607 
 608         if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
 609                 return (WALK_ERR);
 610 
 611         if (!MUTEX_TYPE_ADAPTIVE(&mtx))
 612                 return (WALK_DONE);
 613 
 614         if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == NULL)
 615                 return (WALK_DONE);
 616 
 617         if (mdb_vread(&thr, sizeof (thr), owner) != -1)
 618                 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
 619 
 620         return (WALK_DONE);
 621 }
 622 
 623 static void
 624 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
 625 {
 626         const char *lastnm;
 627         uint_t lastval;
 628         char type[4];
 629 
 630         switch (gate->sgd_type) {
 631         case SDT_SYSIGT:
 632                 strcpy(type, "int");
 633                 break;
 634         case SDT_SYSTGT:
 635                 strcpy(type, "trp");
 636                 break;
 637         case SDT_SYSTASKGT:
 638                 strcpy(type, "tsk");
 639                 break;
 640         default:
 641                 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
 642         }
 643 
 644 #if defined(__amd64)
 645         lastnm = "IST";
 646         lastval = gate->sgd_ist;
 647 #else
 648         lastnm = "STK";
 649         lastval = gate->sgd_stkcpy;
 650 #endif
 651 
 652         if (header) {
 653                 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
 654                     "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
 655                     "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
 656         }
 657 
 658         mdb_printf("%s", label);
 659 
 660         if (gate->sgd_type == SDT_SYSTASKGT)
 661                 mdb_printf("%-30s ", "-");
 662         else
 663                 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
 664 
 665         mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
 666             gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
 667 }
 668 
 669 /*ARGSUSED*/
 670 static int
 671 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 672 {
 673         gate_desc_t gate;
 674 
 675         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
 676                 return (DCMD_USAGE);
 677 
 678         if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 679             sizeof (gate_desc_t)) {
 680                 mdb_warn("failed to read gate descriptor at %p\n", addr);
 681                 return (DCMD_ERR);
 682         }
 683 
 684         gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
 685 
 686         return (DCMD_OK);
 687 }
 688 
 689 /*ARGSUSED*/
 690 static int
 691 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 692 {
 693         int i;
 694 
 695         if (!(flags & DCMD_ADDRSPEC)) {
 696                 GElf_Sym idt0_va;
 697                 gate_desc_t *idt0;
 698 
 699                 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
 700                         mdb_warn("failed to find VA of idt0");
 701                         return (DCMD_ERR);
 702                 }
 703 
 704                 addr = idt0_va.st_value;
 705                 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
 706                         mdb_warn("failed to read idt0 at %p\n", addr);
 707                         return (DCMD_ERR);
 708                 }
 709 
 710                 addr = (uintptr_t)idt0;
 711         }
 712 
 713         for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
 714                 gate_desc_t gate;
 715                 char label[6];
 716 
 717                 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
 718                     sizeof (gate_desc_t)) {
 719                         mdb_warn("failed to read gate descriptor at %p\n",
 720                             addr);
 721                         return (DCMD_ERR);
 722                 }
 723 
 724                 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
 725                 gate_desc_dump(&gate, label, i == 0);
 726         }
 727 
 728         return (DCMD_OK);
 729 }
 730 
 731 static void
 732 htables_help(void)
 733 {
 734         mdb_printf(
 735             "Given a (hat_t *), generates the list of all (htable_t *)s\n"
 736             "that correspond to that address space\n");
 737 }
 738 
 739 static void
 740 report_maps_help(void)
 741 {
 742         mdb_printf(
 743             "Given a PFN, report HAT structures that map the page, or use\n"
 744             "the page as a pagetable.\n"
 745             "\n"
 746             "-m Interpret the PFN as an MFN (machine frame number)\n");
 747 }
 748 
 749 static void
 750 ptable_help(void)
 751 {
 752         mdb_printf(
 753             "Given a PFN holding a page table, print its contents, and\n"
 754             "the address of the corresponding htable structure.\n"
 755             "\n"
 756             "-m Interpret the PFN as an MFN (machine frame number)\n");
 757 }
 758 
 759 /*
 760  * NSEC_SHIFT is replicated here (it is not defined in a header file),
 761  * but for amusement, the reader is directed to the comment that explains
 762  * the rationale for this particular value on x86.  Spoiler:  the value is
 763  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 764  * in that comment sounds too familiar, it's because your author also wrote
 765  * that code -- some fifteen years prior to this writing in 2011...)
 766  */
 767 #define NSEC_SHIFT 5
 768 
 769 /*ARGSUSED*/
 770 static int
 771 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 772 {
 773         uint32_t nsec_scale;
 774         hrtime_t tsc = addr, hrt;
 775         unsigned int *tscp = (unsigned int *)&tsc;
 776         uintptr_t scalehrtimef;
 777         uint64_t scale;
 778         GElf_Sym sym;
 779 
 780         if (!(flags & DCMD_ADDRSPEC)) {
 781                 if (argc != 1)
 782                         return (DCMD_USAGE);
 783 
 784                 switch (argv[0].a_type) {
 785                 case MDB_TYPE_STRING:
 786                         tsc = mdb_strtoull(argv[0].a_un.a_str);
 787                         break;
 788                 case MDB_TYPE_IMMEDIATE:
 789                         tsc = argv[0].a_un.a_val;
 790                         break;
 791                 default:
 792                         return (DCMD_USAGE);
 793                 }
 794         }
 795 
 796         if (mdb_readsym(&scalehrtimef,
 797             sizeof (scalehrtimef), "scalehrtimef") == -1) {
 798                 mdb_warn("couldn't read 'scalehrtimef'");
 799                 return (DCMD_ERR);
 800         }
 801 
 802         if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
 803                 mdb_warn("couldn't find 'tsc_scalehrtime'");
 804                 return (DCMD_ERR);
 805         }
 806 
 807         if (sym.st_value != scalehrtimef) {
 808                 mdb_warn("::scalehrtime requires that scalehrtimef "
 809                     "be set to tsc_scalehrtime\n");
 810                 return (DCMD_ERR);
 811         }
 812 
 813         if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 814                 mdb_warn("couldn't read 'nsec_scale'");
 815                 return (DCMD_ERR);
 816         }
 817 
 818         scale = (uint64_t)nsec_scale;
 819 
 820         hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 821         hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 822 
 823         mdb_printf("0x%llx\n", hrt);
 824 
 825         return (DCMD_OK);
 826 }
 827 
 828 /*
 829  * The x86 feature set is implemented as a bitmap array. That bitmap array is
 830  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 831  * macro. We have the names for each of these features in unix's text segment
 832  * so we do not have to duplicate them and instead just look them up.
 833  */
 834 /*ARGSUSED*/
 835 static int
 836 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
 837     const mdb_arg_t *argv)
 838 {
 839         void *fset;
 840         GElf_Sym sym;
 841         uintptr_t nptr;
 842         char name[128];
 843         int ii;
 844 
 845         size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
 846 
 847         if (argc != 0)
 848                 return (DCMD_USAGE);
 849 
 850         if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
 851                 mdb_warn("couldn't find x86_feature_names");
 852                 return (DCMD_ERR);
 853         }
 854 
 855         fset = mdb_zalloc(sz, UM_NOSLEEP);
 856         if (fset == NULL) {
 857                 mdb_warn("failed to allocate memory for x86_featureset");
 858                 return (DCMD_ERR);
 859         }
 860 
 861         if (mdb_readvar(fset, "x86_featureset") != sz) {
 862                 mdb_warn("failed to read x86_featureset");
 863                 mdb_free(fset, sz);
 864                 return (DCMD_ERR);
 865         }
 866 
 867         for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
 868                 if (!BT_TEST((ulong_t *)fset, ii))
 869                         continue;
 870 
 871                 if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
 872                     sizeof (void *) * ii) != sizeof (char *)) {
 873                         mdb_warn("failed to read feature array %d", ii);
 874                         mdb_free(fset, sz);
 875                         return (DCMD_ERR);
 876                 }
 877 
 878                 if (mdb_readstr(name, sizeof (name), nptr) == -1) {
 879                         mdb_warn("failed to read feature %d", ii);
 880                         mdb_free(fset, sz);
 881                         return (DCMD_ERR);
 882                 }
 883                 mdb_printf("%s\n", name);
 884         }
 885 
 886         mdb_free(fset, sz);
 887         return (DCMD_OK);
 888 }
 889 
 890 #ifdef _KMDB
 891 /* ARGSUSED */
 892 static int
 893 crregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 894 {
 895         ulong_t cr0, cr2, cr3, cr4;
 896         static const mdb_bitmask_t cr0_flag_bits[] = {
 897                 { "PE",         CR0_PE,         CR0_PE },
 898                 { "MP",         CR0_MP,         CR0_MP },
 899                 { "EM",         CR0_EM,         CR0_EM },
 900                 { "TS",         CR0_TS,         CR0_TS },
 901                 { "ET",         CR0_ET,         CR0_ET },
 902                 { "NE",         CR0_NE,         CR0_NE },
 903                 { "WP",         CR0_WP,         CR0_WP },
 904                 { "AM",         CR0_AM,         CR0_AM },
 905                 { "NW",         CR0_NW,         CR0_NW },
 906                 { "CD",         CR0_CD,         CR0_CD },
 907                 { "PG",         CR0_PG,         CR0_PG },
 908                 { NULL,         0,              0 }
 909         };
 910 
 911         static const mdb_bitmask_t cr3_flag_bits[] = {
 912                 { "PCD",        CR3_PCD,        CR3_PCD },
 913                 { "PWT",        CR3_PWT,        CR3_PWT },
 914                 { NULL,         0,              0, }
 915         };
 916 
 917         static const mdb_bitmask_t cr4_flag_bits[] = {
 918                 { "VME",        CR4_VME,        CR4_VME },
 919                 { "PVI",        CR4_PVI,        CR4_PVI },
 920                 { "TSD",        CR4_TSD,        CR4_TSD },
 921                 { "DE",         CR4_DE,         CR4_DE },
 922                 { "PSE",        CR4_PSE,        CR4_PSE },
 923                 { "PAE",        CR4_PAE,        CR4_PAE },
 924                 { "MCE",        CR4_MCE,        CR4_MCE },
 925                 { "PGE",        CR4_PGE,        CR4_PGE },
 926                 { "PCE",        CR4_PCE,        CR4_PCE },
 927                 { "OSFXSR",     CR4_OSFXSR,     CR4_OSFXSR },
 928                 { "OSXMMEXCPT", CR4_OSXMMEXCPT, CR4_OSXMMEXCPT },
 929                 { "VMXE",       CR4_VMXE,       CR4_VMXE },
 930                 { "SMXE",       CR4_SMXE,       CR4_SMXE },
 931                 { "PCIDE",      CR4_PCIDE,      CR4_PCIDE },
 932                 { "OSXSAVE",    CR4_OSXSAVE,    CR4_OSXSAVE },
 933                 { "SMEP",       CR4_SMEP,       CR4_SMEP },
 934                 { "SMAP",       CR4_SMAP,       CR4_SMAP },
 935                 { NULL,         0,              0 }
 936         };
 937 
 938         cr0 = kmdb_unix_getcr0();
 939         cr2 = kmdb_unix_getcr2();
 940         cr3 = kmdb_unix_getcr3();
 941         cr4 = kmdb_unix_getcr4();
 942         mdb_printf("%%cr0 = 0x%08x <%b>\n", cr0, cr0, cr0_flag_bits);
 943         mdb_printf("%%cr2 = 0x%08x <%a>\n", cr2, cr2);
 944 
 945         if ((cr4 & CR4_PCIDE)) {
 946                 mdb_printf("%%cr3 = 0x%08x <pfn:%lu pcid:%u>\n",
 947                     cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET);
 948         } else {
 949                 mdb_printf("%%cr3 = 0x%08x <pfn:%lu flags:%b>\n", cr3,
 950                     cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits);
 951         }
 952 
 953         mdb_printf("%%cr4 = 0x%08x <%b>\n", cr4, cr4, cr4_flag_bits);
 954 
 955         return (DCMD_OK);
 956 }
 957 #endif
 958 
 959 static const mdb_dcmd_t dcmds[] = {
 960         { "gate_desc", ":", "dump a gate descriptor", gate_desc },
 961         { "idt", ":[-v]", "dump an IDT", idt },
 962         { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
 963         { "vatopfn", ":[-a as]", "translate address to physical page",
 964             va2pfn_dcmd },
 965         { "report_maps", ":[-m]",
 966             "Given PFN, report mappings / page table usage",
 967             report_maps_dcmd, report_maps_help },
 968         { "htables", "", "Given hat_t *, lists all its htable_t * values",
 969             htables_dcmd, htables_help },
 970         { "ptable", ":[-m]", "Given PFN, dump contents of a page table",
 971             ptable_dcmd, ptable_help },
 972         { "pte", ":[-p XXXXX] [-l N]", "print human readable page table entry",
 973             pte_dcmd },
 974         { "pfntomfn", ":", "convert physical page to hypervisor machine page",
 975             pfntomfn_dcmd },
 976         { "mfntopfn", ":", "convert hypervisor machine page to physical page",
 977             mfntopfn_dcmd },
 978         { "memseg_list", ":", "show memseg list", memseg_list },
 979         { "scalehrtime", ":",
 980             "scale an unscaled high-res time", scalehrtime_cmd },
 981         { "x86_featureset", NULL, "dump the x86_featureset vector",
 982                 x86_featureset_cmd },
 983 #ifdef _KMDB
 984         { "crregs", NULL, "dump control registers", crregs_dcmd },
 985 #endif
 986         { NULL }
 987 };
 988 
 989 static const mdb_walker_t walkers[] = {
 990         { "ttrace", "walks trap trace buffers in reverse chronological order",
 991                 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
 992         { "mutex_owner", "walks the owner of a mutex",
 993                 mutex_owner_init, mutex_owner_step },
 994         { "memseg", "walk the memseg structures",
 995                 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
 996         { NULL }
 997 };
 998 
 999 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1000 
1001 const mdb_modinfo_t *
1002 _mdb_init(void)
1003 {
1004         return (&modinfo);
1005 }
1006 
1007 void
1008 _mdb_fini(void)
1009 {
1010         free_mmu();
1011 }